From 4f5b1beaf619b916390b15377f09132b08ffec2a Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 29 Jun 2021 19:50:25 -0400
Subject: [PATCH 1/7] WIP - start splitting up strings.

---
 mathics/builtin/__init__.py          |    2 +-
 mathics/builtin/string/__init__.py   |    6 +
 mathics/builtin/string/characters.py |  213 +++++
 mathics/builtin/string/operations.py |  781 +++++++++++++++++++
 mathics/builtin/string/patterns.py   |  119 +++
 mathics/builtin/strings.py           | 1068 +-------------------------
 6 files changed, 1125 insertions(+), 1064 deletions(-)
 create mode 100644 mathics/builtin/string/__init__.py
 create mode 100644 mathics/builtin/string/characters.py
 create mode 100644 mathics/builtin/string/operations.py
 create mode 100644 mathics/builtin/string/patterns.py
diff --git a/mathics/builtin/__init__.py b/mathics/builtin/__init__.py
index bf5b0cb38..dd271b8f5 100755
--- a/mathics/builtin/__init__.py
+++ b/mathics/builtin/__init__.py
@@ -153,7 +153,7 @@ def is_builtin(var):
     [] if ENABLE_FILES_MODULE else ["files_io.files", "files_io.importexport"]
 )
 
-for subdir in ("colors", "drawing", "files_io", "numbers", "specialfns", "fileformats"):
+for subdir in ("colors", "drawing", "files_io", "numbers", "specialfns", "string", "fileformats"):
     import_name = f"{__name__}.{subdir}"
 
     if import_name in disable_file_module_names:
diff --git a/mathics/builtin/string/__init__.py b/mathics/builtin/string/__init__.py
new file mode 100644
index 000000000..9b776f908
--- /dev/null
+++ b/mathics/builtin/string/__init__.py
@@ -0,0 +1,6 @@
+"""
+Strings and Characters
+
+"""
+
+from mathics.version import __version__  # noqa used in loading to check consistency.
diff --git a/mathics/builtin/string/characters.py b/mathics/builtin/string/characters.py
new file mode 100644
index 000000000..78fae72aa
--- /dev/null
+++ b/mathics/builtin/string/characters.py
@@ -0,0 +1,213 @@
+# -*- coding: utf-8 -*-
+"""
+Characters in Strings
+"""
+
+from mathics.version import __version__  # noqa used in loading to check consistency.
+
+from mathics.builtin.base import Builtin, Test
+
+from mathics.core.expression import (
+    Expression,
+    String,
+    SymbolList,
+)
+
+
+class Characters(Builtin):
+    """
+    <dl>
+    <dt>'Characters["$string$"]'
+        <dd>returns a list of the characters in $string$.
+    </dl>
+
+    >> Characters["abc"]
+     = {a, b, c}
+
+    #> \\.78\\.79\\.7A
+     = xyz
+
+    #> \\:0078\\:0079\\:007A
+     = xyz
+
+    #> \\101\\102\\103\\061\\062\\063
+     = ABC123
+
+    #> \\[Alpha]\\[Beta]\\[Gamma]
+     = \u03B1\u03B2\u03B3
+    """
+
+    attributes = ("Listable",)
+
+    def apply(self, string, evaluation):
+        "Characters[string_String]"
+
+        return Expression(SymbolList, *(String(c) for c in string.value))
+
+
+class CharacterRange(Builtin):
+    """
+    <dl>
+    <dt>'CharacterRange["$a$", "$b$"]'
+        <dd>returns a list of the Unicode characters from $a$ to $b$
+        inclusive.
+    </dl>
+
+    >> CharacterRange["a", "e"]
+     = {a, b, c, d, e}
+    >> CharacterRange["b", "a"]
+     = {}
+    """
+
+    attributes = ("ReadProtected",)
+
+    messages = {
+        "argtype": "Arguments `1` and `2` are not both strings of length 1.",
+    }
+
+    def apply(self, start, stop, evaluation):
+        "CharacterRange[start_String, stop_String]"
+
+        if len(start.value) != 1 or len(stop.value) != 1:
+            evaluation.message("CharacterRange", "argtype", start, stop)
+            return
+        start = ord(start.value[0])
+        stop = ord(stop.value[0])
+        return Expression(
+            "List", *[String(chr(code)) for code in range(start, stop + 1)]
+        )
+
+
+class DigitQ(Builtin):
+    """
+    <dl>
+    <dt>'DigitQ[$string$]'
+        yields 'True' if all the characters in the $string$ are digits, and yields 'False' otherwise.
+    </dl>
+
+    >> DigitQ["9"]
+     = True
+
+    >> DigitQ["a"]
+     = False
+
+    >> DigitQ["01001101011000010111010001101000011010010110001101110011"]
+     = True
+
+    >> DigitQ["-123456789"]
+     = False
+
+    """
+
+    rules = {
+        "DigitQ[string_]": (
+            "If[StringQ[string], StringMatchQ[string, DigitCharacter...], False, False]"
+        ),
+    }
+
+
+class LetterQ(Builtin):
+    """
+    <dl>
+    <dt>'LetterQ[$string$]'
+        yields 'True' if all the characters in the $string$ are letters, and yields 'False' otherwise.
+    </dl>
+
+    >> LetterQ["m"]
+     = True
+
+    >> LetterQ["9"]
+     = False
+
+    >> LetterQ["Mathics"]
+     = True
+
+    >> LetterQ["Welcome to Mathics"]
+     = False
+
+    #> LetterQ[""]
+     = True
+
+    #> LetterQ["\\[Alpha]\\[Beta]\\[Gamma]\\[Delta]\\[Epsilon]\\[Zeta]\\[Eta]\\[Theta]"]
+     = True
+    """
+
+    rules = {
+        "LetterQ[string_]": (
+            "If[StringQ[string], StringMatchQ[string, LetterCharacter...], False, False]"
+        ),
+    }
+
+
+class LowerCaseQ(Test):
+    """
+    <dl>
+    <dt>'LowerCaseQ[$s$]'
+        <dd>returns True if $s$ consists wholly of lower case characters.
+    </dl>
+
+    >> LowerCaseQ["abc"]
+     = True
+
+    An empty string returns True.
+    >> LowerCaseQ[""]
+     = True
+    """
+
+    def test(self, s):
+        return isinstance(s, String) and all(c.islower() for c in s.get_string_value())
+
+
+class ToLowerCase(Builtin):
+    """
+    <dl>
+    <dt>'ToLowerCase[$s$]'
+        <dd>returns $s$ in all lower case.
+    </dl>
+
+    >> ToLowerCase["New York"]
+     = new york
+    """
+
+    attributes = ("Listable", "Protected")
+
+    def apply(self, s, evaluation):
+        "ToLowerCase[s_String]"
+        return String(s.get_string_value().lower())
+
+
+class ToUpperCase(Builtin):
+    """
+    <dl>
+    <dt>'ToUpperCase[$s$]'
+        <dd>returns $s$ in all upper case.
+    </dl>
+
+    >> ToUpperCase["New York"]
+     = NEW YORK
+    """
+
+    attributes = ("Listable", "Protected")
+
+    def apply(self, s, evaluation):
+        "ToUpperCase[s_String]"
+        return String(s.get_string_value().upper())
+
+
+class UpperCaseQ(Test):
+    """
+    <dl>
+    <dt>'UpperCaseQ[$s$]'
+        <dd>returns True if $s$ consists wholly of upper case characters.
+    </dl>
+
+    >> UpperCaseQ["ABC"]
+     = True
+
+    An empty string returns True.
+    >> UpperCaseQ[""]
+     = True
+    """
+
+    def test(self, s):
+        return isinstance(s, String) and all(c.isupper() for c in s.get_string_value())
diff --git a/mathics/builtin/string/operations.py b/mathics/builtin/string/operations.py
new file mode 100644
index 000000000..9a68b2bd9
--- /dev/null
+++ b/mathics/builtin/string/operations.py
@@ -0,0 +1,781 @@
+# -*- coding: utf-8 -*-
+
+"""
+Operations on Strings
+"""
+
+import re
+from sys import version_info
+from binascii import hexlify, unhexlify
+from heapq import heappush, heappop
+
+from mathics.version import __version__  # noqa used in loading to check consistency.
+
+from mathics.builtin.base import (
+    BinaryOperator,
+    Builtin,
+)
+from mathics.core.expression import (
+    Expression,
+    Symbol,
+    SymbolFalse,
+    SymbolTrue,
+    SymbolList,
+    String,
+    Integer,
+    Integer1,
+    from_python,
+)
+from mathics.builtin.lists import python_seq, convert_seq
+from mathics.builtin.strings import (
+    _StringFind,
+    _decode_pname,
+    _encode_pname,
+    _evaluate_match,
+    _parallel_match,
+    to_regex,
+)
+
+
+class StringDrop(Builtin):
+    """
+    <dl>
+    <dt>'StringDrop["$string$", $n$]'
+        <dd>gives $string$ with the first $n$ characters dropped.
+    <dt>'StringDrop["$string$", -$n$]'
+        <dd>gives $string$ with the last $n$ characters dropped.
+    <dt>'StringDrop["$string$", {$n$}]'
+        <dd>gives $string$ with the $n$th character dropped.
+    <dt>'StringDrop["$string$", {$m$, $n$}]'
+        <dd>gives $string$ with the characters $m$ through $n$ dropped.
+    </dl>
+
+    >> StringDrop["abcde", 2]
+    = cde
+    >> StringDrop["abcde", -2]
+    = abc
+    >> StringDrop["abcde", {2}]
+    = acde
+    >> StringDrop["abcde", {2,3}]
+    = ade
+    >> StringDrop["abcd",{3,2}]
+    = abcd
+    >> StringDrop["abcd",0]
+    = abcd
+    """
+
+    messages = {
+        "strse": "String expected at position 1.",
+        "mseqs": "Integer or list of two Integers are expected at position 2.",
+        "drop": 'Cannot drop positions `1` through `2` in "`3`".',
+    }
+
+    def apply_with_n(self, string, n, evaluation):
+        "StringDrop[string_,n_Integer]"
+        if not isinstance(string, String):
+            return evaluation.message("StringDrop", "strse")
+        if isinstance(n, Integer):
+            pos = n.value
+            if pos > len(string.get_string_value()):
+                return evaluation.message("StringDrop", "drop", 1, pos, string)
+            if pos < -len(string.get_string_value()):
+                return evaluation.message("StringDrop", "drop", pos, -1, string)
+            if pos > 0:
+                return String(string.get_string_value()[pos:])
+            if pos < 0:
+                return String(string.get_string_value()[:(pos)])
+            if pos == 0:
+                return string
+        return evaluation.message("StringDrop", "mseqs")
+
+    def apply_with_ni_nf(self, string, ni, nf, evaluation):
+        "StringDrop[string_,{ni_Integer,nf_Integer}]"
+        if not isinstance(string, String):
+            return evaluation.message("StringDrop", "strse", string)
+
+        if ni.value == 0 or nf.value == 0:
+            return evaluation.message("StringDrop", "drop", ni, nf)
+        fullstring = string.get_string_value()
+        lenfullstring = len(fullstring)
+        posi = ni.value
+        if posi < 0:
+            posi = lenfullstring + posi + 1
+        posf = nf.value
+        if posf < 0:
+            posf = lenfullstring + posf + 1
+        if posf > lenfullstring or posi > lenfullstring or posf <= 0 or posi <= 0:
+            # positions out or range
+            return evaluation.message("StringDrop", "drop", ni, nf, fullstring)
+        if posf < posi:
+            return string  # this is what actually mma does
+        return String(fullstring[: (posi - 1)] + fullstring[posf:])
+
+    def apply_with_ni(self, string, ni, evaluation):
+        "StringDrop[string_,{ni_Integer}]"
+        if not isinstance(string, String):
+            return evaluation.message("StringDrop", "strse", string)
+        if ni.value == 0:
+            return evaluation.message("StringDrop", "drop", ni, ni)
+        fullstring = string.get_string_value()
+        lenfullstring = len(fullstring)
+        posi = ni.value
+        if posi < 0:
+            posi = lenfullstring + posi + 1
+        if posi > lenfullstring or posi <= 0:
+            return evaluation.message("StringDrop", "drop", ni, ni, fullstring)
+        return String(fullstring[: (posi - 1)] + fullstring[posi:])
+
+    def apply(self, string, something, evaluation):
+        "StringDrop[string_,something___]"
+        if not isinstance(string, String):
+            return evaluation.message("StringDrop", "strse")
+        return evaluation.message("StringDrop", "mseqs")
+
+
+class StringInsert(Builtin):
+    """
+    <dl>
+      <dt>'StringInsert["$string$", "$snew$", $n$]'
+      <dd>yields a string with $snew$ inserted starting at position $n$ in $string$.
+
+      <dt>'StringInsert["$string$", "$snew$", -$n$]'
+      <dd>inserts a at position $n$ from the end of "$string$".
+
+      <dt>'StringInsert["$string$", "$snew$", {$n_1$, $n_2$, ...}]'
+      <dd>inserts a copy of $snew$ at each position $n_i$ in $string$;
+        the $n_i$ are taken before any insertion is done.
+
+      <dt>'StringInsert[{$s_1$, $s_2$, ...}, "$snew$", $n$]'
+      <dd>gives the list of resutls for each of the $s_i$.
+    </dl>
+
+    >> StringInsert["noting", "h", 4]
+     = nothing
+
+    #> StringInsert["abcdefghijklm", "X", 15]
+     : Cannot insert at position 15 in abcdefghijklm.
+     = StringInsert[abcdefghijklm, X, 15]
+
+    #> StringInsert[abcdefghijklm, "X", 4]
+     : String or list of strings expected at position 1 in StringInsert[abcdefghijklm, X, 4].
+     = StringInsert[abcdefghijklm, X, 4]
+
+    #> StringInsert["abcdefghijklm", X, 4]
+     : String expected at position 2 in StringInsert[abcdefghijklm, X, 4].
+     = StringInsert[abcdefghijklm, X, 4]
+
+    #> StringInsert["abcdefghijklm", "X", a]
+     : Position specification a in StringInsert[abcdefghijklm, X, a] is not a machine-sized integer or a list of machine-sized integers.
+     = StringInsert[abcdefghijklm, X, a]
+
+    #> StringInsert["abcdefghijklm", "X", 0]
+     : Cannot insert at position 0 in abcdefghijklm.
+     =  StringInsert[abcdefghijklm, X, 0]
+
+    >> StringInsert["note", "d", -1]
+     = noted
+
+    >> StringInsert["here", "t", -5]
+     = there
+
+    #> StringInsert["abcdefghijklm", "X", -15]
+     : Cannot insert at position -15 in abcdefghijklm.
+     = StringInsert[abcdefghijklm, X, -15]
+
+    >> StringInsert["adac", "he", {1, 5}]
+     = headache
+
+    #> StringInsert["abcdefghijklm", "X", {1, -1, 14, -14}]
+     = XXabcdefghijklmXX
+
+    #> StringInsert["abcdefghijklm", "X", {1, 0}]
+     : Cannot insert at position 0 in abcdefghijklm.
+     = StringInsert[abcdefghijklm, X, {1, 0}]
+
+    #> StringInsert["", "X", {1}]
+     = X
+
+    #> StringInsert["", "X", {1, -1}]
+     = XX
+
+    #> StringInsert["", "", {1}]
+     = #<--#
+
+    #> StringInsert["", "X", {1, 2}]
+     : Cannot insert at position 2 in .
+     = StringInsert[, X, {1, 2}]
+
+    #> StringInsert["abcdefghijklm", "", {1, 2, 3, 4 ,5, -6}]
+     = abcdefghijklm
+
+    #> StringInsert["abcdefghijklm", "X", {}]
+     = abcdefghijklm
+
+    >> StringInsert[{"something", "sometimes"}, " ", 5]
+     = {some thing, some times}
+
+    #> StringInsert[{"abcdefghijklm", "Mathics"}, "X", 13]
+     : Cannot insert at position 13 in Mathics.
+     = {abcdefghijklXm, StringInsert[Mathics, X, 13]}
+
+    #> StringInsert[{"", ""}, "", {1, 1, 1, 1}]
+     = {, }
+
+    #> StringInsert[{"abcdefghijklm", "Mathics"}, "X", {0, 2}]
+     : Cannot insert at position 0 in abcdefghijklm.
+     : Cannot insert at position 0 in Mathics.
+     = {StringInsert[abcdefghijklm, X, {0, 2}], StringInsert[Mathics, X, {0, 2}]}
+
+    #> StringInsert[{"abcdefghijklm", Mathics}, "X", {1, 2}]
+     : String or list of strings expected at position 1 in StringInsert[{abcdefghijklm, Mathics}, X, {1, 2}].
+     = StringInsert[{abcdefghijklm, Mathics}, X, {1, 2}]
+
+    #> StringInsert[{"", "Mathics"}, "X", {1, 1, -1}]
+     = {XXX, XXMathicsX}
+
+    >> StringInsert["1234567890123456", ".", Range[-16, -4, 3]]
+     = 1.234.567.890.123.456"""
+
+    messages = {
+        "strse": "String or list of strings expected at position `1` in `2`.",
+        "string": "String expected at position `1` in `2`.",
+        "ins": "Cannot insert at position `1` in `2`.",
+        "psl": "Position specification `1` in `2` is not a machine-sized integer or a list of machine-sized integers.",
+    }
+
+    def _insert(self, str, add, lpos, evaluation):
+        for pos in lpos:
+            if abs(pos) < 1 or abs(pos) > len(str) + 1:
+                evaluation.message("StringInsert", "ins", Integer(pos), String(str))
+                return evaluation.format_output(
+                    Expression(
+                        "StringInsert", str, add, lpos[0] if len(lpos) == 1 else lpos
+                    )
+                )
+
+        # Create new list of position which are rearranged
+        pos_limit = len(str) + 2
+        listpos = [p if p > 0 else pos_limit + p for p in lpos]
+        listpos.sort()
+
+        result = ""
+        start = 0
+        for pos in listpos:
+            stop = pos - 1
+            result += str[start:stop] + add
+            start = stop
+        else:
+            result += str[start : len(str)]
+
+        return result
+
+    def apply(self, strsource, strnew, pos, evaluation):
+        "StringInsert[strsource_, strnew_, pos_]"
+
+        exp = Expression("StringInsert", strsource, strnew, pos)
+
+        py_strnew = strnew.get_string_value()
+        if py_strnew is None:
+            return evaluation.message("StringInsert", "string", Integer(2), exp)
+
+        # Check and create list of position
+        listpos = []
+        if pos.has_form("List", None):
+            leaves = pos.get_leaves()
+            if not leaves:
+                return strsource
+            else:
+                for i, posi in enumerate(leaves):
+                    py_posi = posi.get_int_value()
+                    if py_posi is None:
+                        return evaluation.message("StringInsert", "psl", pos, exp)
+                    listpos.append(py_posi)
+        else:
+            py_pos = pos.get_int_value()
+            if py_pos is None:
+                return evaluation.message("StringInsert", "psl", pos, exp)
+            listpos.append(py_pos)
+
+        # Check and perform the insertion
+        if strsource.has_form("List", None):
+            py_strsource = [sub.get_string_value() for sub in strsource.leaves]
+            if any(sub is None for sub in py_strsource):
+                return evaluation.message("StringInsert", "strse", Integer1, exp)
+            return Expression(
+                "List",
+                *[
+                    String(self._insert(s, py_strnew, listpos, evaluation))
+                    for s in py_strsource
+                ]
+            )
+        else:
+            py_strsource = strsource.get_string_value()
+            if py_strsource is None:
+                return evaluation.message("StringInsert", "strse", Integer1, exp)
+            return String(self._insert(py_strsource, py_strnew, listpos, evaluation))
+
+
+class StringJoin(BinaryOperator):
+    """
+    <dl>
+    <dt>'StringJoin["$s1$", "$s2$", ...]'
+        <dd>returns the concatenation of the strings $s1$, $s2$,  .
+    </dl>
+
+    >> StringJoin["a", "b", "c"]
+     = abc
+    >> "a" <> "b" <> "c" // InputForm
+     = "abc"
+
+    'StringJoin' flattens lists out:
+    >> StringJoin[{"a", "b"}] // InputForm
+     = "ab"
+    >> Print[StringJoin[{"Hello", " ", {"world"}}, "!"]]
+     | Hello world!
+    """
+
+    operator = "<>"
+    precedence = 600
+    attributes = ("Flat", "OneIdentity")
+
+    def apply(self, items, evaluation):
+        "StringJoin[items___]"
+
+        result = ""
+        items = items.flatten(SymbolList)
+        if items.get_head_name() == "System`List":
+            items = items.leaves
+        else:
+            items = items.get_sequence()
+        for item in items:
+            if not isinstance(item, String):
+                evaluation.message("StringJoin", "string")
+                return
+            result += item.value
+        return String(result)
+
+
+class StringLength(Builtin):
+    """
+    <dl>
+    <dt>'StringLength["$string$"]'
+        <dd>gives the length of $string$.
+    </dl>
+
+    >> StringLength["abc"]
+     = 3
+    'StringLength' is listable:
+    >> StringLength[{"a", "bc"}]
+     = {1, 2}
+
+    >> StringLength[x]
+     : String expected.
+     = StringLength[x]
+    """
+
+    attributes = ("Listable",)
+
+    def apply(self, str, evaluation):
+        "StringLength[str_]"
+
+        if not isinstance(str, String):
+            evaluation.message("StringLength", "string")
+            return
+        return Integer(len(str.value))
+
+
+class StringPosition(Builtin):
+    """
+    <dl>
+    <dt>'StringPosition["$string$", $patt$]'
+      <dd>gives a list of starting and ending positions where $patt$ matches "$string$".
+    <dt>'StringPosition["$string$", $patt$, $n$]'
+      <dd>returns the first $n$ matches only.
+    <dt>'StringPosition["$string$", {$patt1$, $patt2$, ...}, $n$]'
+      <dd>matches multiple patterns.
+    <dt>'StringPosition[{$s1$, $s2$, ...}, $patt$]'
+      <dd>returns a list of matches for multiple strings.
+    </dl>
+
+    >> StringPosition["123ABCxyABCzzzABCABC", "ABC"]
+     = {{4, 6}, {9, 11}, {15, 17}, {18, 20}}
+
+    >> StringPosition["123ABCxyABCzzzABCABC", "ABC", 2]
+     = {{4, 6}, {9, 11}}
+
+    'StringPosition' can be useful for searching through text.
+    >> data = Import["ExampleData/EinsteinSzilLetter.txt"];
+    >> StringPosition[data, "uranium"]
+     = {{299, 305}, {870, 876}, {1538, 1544}, {1671, 1677}, {2300, 2306}, {2784, 2790}, {3093, 3099}}
+
+    #> StringPosition["123ABCxyABCzzzABCABC", "ABC", -1]
+     : Non-negative integer or Infinity expected at position 3 in StringPosition[123ABCxyABCzzzABCABC, ABC, -1].
+     = StringPosition[123ABCxyABCzzzABCABC, ABC, -1]
+
+    ## Overlaps
+    #> StringPosition["1231221312112332", RegularExpression["[12]+"]]
+     = {{1, 2}, {2, 2}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {9, 13}, {10, 13}, {11, 13}, {12, 13}, {13, 13}, {16, 16}}
+    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> False]
+     = {{1, 2}, {4, 7}, {9, 13}, {16, 16}}
+    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> x]
+     = {{1, 2}, {4, 7}, {9, 13}, {16, 16}}
+    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> All]
+     : Overlaps -> All option is not currently implemented in Mathics.
+     = {{1, 2}, {2, 2}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {9, 13}, {10, 13}, {11, 13}, {12, 13}, {13, 13}, {16, 16}}
+
+    #> StringPosition["21211121122", {"121", "11"}]
+     = {{2, 4}, {4, 5}, {5, 6}, {6, 8}, {8, 9}}
+    #> StringPosition["21211121122", {"121", "11"}, Overlaps -> False]
+     = {{2, 4}, {5, 6}, {8, 9}}
+
+    #> StringPosition[{"abc", "abcda"}, "a"]
+     = {{{1, 1}}, {{1, 1}, {5, 5}}}
+
+    #> StringPosition[{"abc"}, "a", Infinity]
+     = {{{1, 1}}}
+
+    #> StringPosition["abc"]["123AabcDEabc"]
+     = {{5, 7}, {10, 12}}
+    """
+
+    options = {
+        "IgnoreCase": "False",
+        "MetaCharacters": "None",
+        "Overlaps": "True",
+    }
+
+    messages = {
+        "strse": "String or list of strings expected at position `1` in `2`.",
+        "overall": "Overlaps -> All option is not currently implemented in Mathics.",
+        "innf": "Non-negative integer or Infinity expected at position `2` in `1`.",
+    }
+
+    rules = {
+        "StringPosition[patt_][s_]": "StringPosition[s, patt]",
+    }
+
+    def apply(self, string, patt, evaluation, options):
+        "StringPosition[string_, patt_, OptionsPattern[StringPosition]]"
+
+        return self.apply_n(
+            string,
+            patt,
+            Expression("DirectedInfinity", Integer1),
+            evaluation,
+            options,
+        )
+
+    def apply_n(self, string, patt, n, evaluation, options):
+        "StringPosition[string_, patt_, n:(_Integer|DirectedInfinity[1]), OptionsPattern[StringPosition]]"
+
+        expr = Expression("StringPosition", string, patt, n)
+
+        # check n
+        if n.has_form("DirectedInfinity", 1):
+            py_n = float("inf")
+        else:
+            py_n = n.get_int_value()
+            if py_n is None or py_n < 0:
+                return evaluation.message("StringPosition", "innf", expr, Integer(3))
+
+        # check options
+        if options["System`Overlaps"] == SymbolTrue:
+            overlap = True
+        elif options["System`Overlaps"] == SymbolFalse:
+            overlap = False
+        elif options["System`Overlaps"] == Symbol("All"):
+            # TODO
+            evaluation.message("StringPosition", "overall")
+            overlap = True
+        else:
+            overlap = False  # unknown options are teated as False
+
+        # convert patterns
+        if patt.has_form("List", None):
+            patts = patt.get_leaves()
+        else:
+            patts = [patt]
+        re_patts = []
+        for p in patts:
+            py_p = to_regex(p, evaluation)
+            if py_p is None:
+                return evaluation.message("StringExpression", "invld", p, patt)
+            re_patts.append(py_p)
+        compiled_patts = [re.compile(re_patt) for re_patt in re_patts]
+
+        # string or list of strings
+        if string.has_form("List", None):
+            py_strings = [s.get_string_value() for s in string.leaves]
+            if None in py_strings:
+                return
+            results = [
+                self.do_apply(py_string, compiled_patts, py_n, overlap)
+                for py_string in py_strings
+            ]
+            return Expression(SymbolList, *results)
+        else:
+            py_string = string.get_string_value()
+            if py_string is None:
+                return
+            return self.do_apply(py_string, compiled_patts, py_n, overlap)
+
+    @staticmethod
+    def do_apply(py_string, compiled_patts, py_n, overlap):
+        result = []
+        start = 0
+        while start < len(py_string):
+            found_match = False
+            for compiled_patt in compiled_patts:
+                m = compiled_patt.match(py_string, start)
+                if m is None:
+                    continue
+                found_match = True
+                result.append([m.start() + 1, m.end()])  # 0 to 1 based indexing
+                if len(result) == py_n:
+                    return from_python(result)
+                if not overlap:
+                    start = m.end()
+            if overlap or not found_match:
+                start += 1
+        return from_python(result)
+
+
+class StringReplace(_StringFind):
+    """
+    <dl>
+    <dt>'StringReplace["$string$", "$a$"->"$b$"]'
+        <dd>replaces each occurrence of $old$ with $new$ in $string$.
+    <dt>'StringReplace["$string$", {"$s1$"->"$sp1$", "$s2$"->"$sp2$"}]'
+        <dd>performs multiple replacements of each $si$ by the
+        corresponding $spi$ in $string$.
+    <dt>'StringReplace["$string$", $srules$, $n$]'
+        <dd>only performs the first $n$ replacements.
+    <dt>'StringReplace[{"$string1$", "$string2$", ...}, $srules$]'
+        <dd>performs the replacements specified by $srules$ on a list
+        of strings.
+    </dl>
+
+    StringReplace replaces all occurrences of one substring with another:
+    >> StringReplace["xyxyxyyyxxxyyxy", "xy" -> "A"]
+     = AAAyyxxAyA
+
+    Multiple replacements can be supplied:
+    >> StringReplace["xyzwxyzwxxyzxyzw", {"xyz" -> "A", "w" -> "BCD"}]
+     = ABCDABCDxAABCD
+
+    Only replace the first 2 occurences:
+    >> StringReplace["xyxyxyyyxxxyyxy", "xy" -> "A", 2]
+     = AAxyyyxxxyyxy
+
+    Also works for multiple rules:
+    >> StringReplace["abba", {"a" -> "A", "b" -> "B"}, 2]
+     = ABba
+
+    StringReplace acts on lists of strings too:
+    >> StringReplace[{"xyxyxxy", "yxyxyxxxyyxy"}, "xy" -> "A"]
+     = {AAxA, yAAxxAyA}
+
+    #> StringReplace["abcabc", "a" -> "b", Infinity]
+     = bbcbbc
+    #> StringReplace[x, "a" -> "b"]
+     : String or list of strings expected at position 1 in StringReplace[x, a -> b].
+     = StringReplace[x, a -> b]
+    #> StringReplace["xyzwxyzwaxyzxyzw", x]
+     : x is not a valid string replacement rule.
+     = StringReplace[xyzwxyzwaxyzxyzw, x]
+    #> StringReplace["xyzwxyzwaxyzxyzw", x -> y]
+     : Element x is not a valid string or pattern element in x.
+     = StringReplace[xyzwxyzwaxyzxyzw, x -> y]
+    #> StringReplace["abcabc", "a" -> "b", -1]
+     : Non-negative integer or Infinity expected at position 3 in StringReplace[abcabc, a -> b, -1].
+     = StringReplace[abcabc, a -> b, -1]
+    #> StringReplace["abc", "b" -> 4]
+     : String expected.
+     = a <> 4 <> c
+
+    #> StringReplace["01101100010", "01" .. -> "x"]
+     = x1x100x0
+
+    #> StringReplace["abc abcb abdc", "ab" ~~ _ -> "X"]
+     = X Xb Xc
+
+    #> StringReplace["abc abcd abcd",  WordBoundary ~~ "abc" ~~ WordBoundary -> "XX"]
+     = XX abcd abcd
+
+    #> StringReplace["abcd acbd", RegularExpression["[ab]"] -> "XX"]
+     = XXXXcd XXcXXd
+
+    #> StringReplace["abcd acbd", RegularExpression["[ab]"] ~~ _ -> "YY"]
+     = YYcd YYYY
+
+    #> StringReplace["abcdabcdaabcabcd", {"abc" -> "Y", "d" -> "XXX"}]
+     = YXXXYXXXaYYXXX
+
+
+    #> StringReplace["  Have a nice day.  ", (StartOfString ~~ Whitespace) | (Whitespace ~~ EndOfString) -> ""] // FullForm
+     = "Have a nice day."
+
+    #> StringReplace["xyXY", "xy" -> "01"]
+     = 01XY
+    #> StringReplace["xyXY", "xy" -> "01", IgnoreCase -> True]
+     = 0101
+
+    StringReplace also can be used as an operator:
+    >> StringReplace["y" -> "ies"]["city"]
+     = cities
+    """
+
+    # TODO Special Characters
+    """
+    #> StringReplace["product: A \\[CirclePlus] B" , "\\[CirclePlus]" -> "x"]
+     = A x B
+    """
+
+    rules = {
+        "StringReplace[rule_][string_]": "StringReplace[string, rule]",
+    }
+
+    def _find(self, py_stri, py_rules, py_n, flags, evaluation):
+        def cases():
+            k = 0
+            for match, form in _parallel_match(py_stri, py_rules, flags, py_n):
+                start, end = match.span()
+                if start > k:
+                    yield String(py_stri[k:start])
+                yield _evaluate_match(form, match, evaluation)
+                k = end
+            if k < len(py_stri):
+                yield String(py_stri[k:])
+
+        return Expression("StringJoin", *list(cases()))
+
+    def apply(self, string, rule, n, evaluation, options):
+        "%(name)s[string_, rule_, OptionsPattern[%(name)s], n_:System`Private`Null]"
+        # this pattern is a slight hack to get around missing Shortest/Longest.
+        return self._apply(string, rule, n, evaluation, options, False)
+
+
+class StringReverse(Builtin):
+    """
+    <dl>
+      <dt>'StringReverse["$string$"]'
+      <dd>reverses the order of the characters in "string".
+      </dl>
+
+      >> StringReverse["live"]
+       = evil
+    """
+
+    attributes = ("Listable", "Protected")
+
+    def apply(self, string, evaluation):
+        "StringReverse[string_String]"
+        return String(string.get_string_value()[::-1])
+
+
+class StringTake(Builtin):
+    """
+    <dl>
+      <dt>'StringTake["$string$", $n$]'
+      <dd>gives the first $n$ characters in $string$.
+
+      <dt>'StringTake["$string$", -$n$]'
+      <dd>gives the last $n$ characters in $string$.
+
+      <dt>'StringTake["$string$", {$n$}]'
+      <dd>gives the $n$th character in $string$.
+
+      <dt>'StringTake["$string$", {$m$, $n$}]'
+      <dd>gives characters $m$ through $n$ in $string$.
+
+      <dt>'StringTake["$string$", {$m$, $n$, $s$}]'
+      <dd>gives characters $m$ through $n$ in steps of $s$.
+
+      <dt>'StringTake[{$s1$, $s2$, ...} $spec$}]'
+      <dd>gives the list of results for each of the $si$.
+    </dl>
+
+    >> StringTake["abcde", 2]
+     = ab
+    >> StringTake["abcde", 0]
+     = #<--#
+    >> StringTake["abcde", -2]
+     = de
+    >> StringTake["abcde", {2}]
+     = b
+    >> StringTake["abcd", {2,3}]
+     = bc
+    >> StringTake["abcdefgh", {1, 5, 2}]
+     = ace
+
+    Take the last 2 characters from several strings:
+    >> StringTake[{"abcdef", "stuv", "xyzw"}, -2]
+     = {ef, uv, zw}
+
+    StringTake also supports standard sequence specifications
+    >> StringTake["abcdef", All]
+     = abcdef
+
+    #> StringTake["abcd", 0] // InputForm
+    = ""
+    #> StringTake["abcd", {3, 2}] // InputForm
+    = ""
+    #> StringTake["", {1, 0}] // InputForm
+    = ""
+
+    #> StringTake["abc", {0, 0}]
+    : Cannot take positions 0 through 0 in "abc".
+    = StringTake[abc, {0, 0}]
+
+    #> StringTake[{2, 4},2]
+     : String or list of strings expected at position 1.
+     = StringTake[{2, 4}, 2]
+
+    #> StringTake["kkkl",Graphics[{}]]
+     : Integer or a list of sequence specifications expected at position 2.
+     = StringTake[kkkl, -Graphics-]
+    """
+
+    messages = {
+        "strse": "String or list of strings expected at position 1.",
+        # FIXME: mseqs should be: Sequence specification (+n, -n, {+n}, {-n}, {m, n}, or {m, n, s}) or a list
+        # of sequence specifications expected at position 2 in
+        "mseqs": "Integer or a list of sequence specifications expected at position 2.",
+        "take": 'Cannot take positions `1` through `2` in "`3`".',
+    }
+
+    def apply(self, string, seqspec, evaluation):
+        "StringTake[string_String, seqspec_]"
+        result = string.get_string_value()
+        if result is None:
+            return evaluation.message("StringTake", "strse")
+
+        if isinstance(seqspec, Integer):
+            pos = seqspec.get_int_value()
+            if pos >= 0:
+                seq = (1, pos, 1)
+            else:
+                seq = (pos, None, 1)
+        else:
+            seq = convert_seq(seqspec)
+
+        if seq is None:
+            return evaluation.message("StringTake", "mseqs")
+
+        start, stop, step = seq
+        py_slice = python_seq(start, stop, step, len(result))
+
+        if py_slice is None:
+            return evaluation.message("StringTake", "take", start, stop, string)
+
+        return String(result[py_slice])
+
+    def apply_strings(self, strings, spec, evaluation):
+        "StringTake[strings__, spec_]"
+        result_list = []
+        for string in strings.leaves:
+            result = self.apply(string, spec, evaluation)
+            if result is None:
+                return None
+            result_list.append(result)
+        return Expression("List", *result_list)
diff --git a/mathics/builtin/string/patterns.py b/mathics/builtin/string/patterns.py
new file mode 100644
index 000000000..1b94e308c
--- /dev/null
+++ b/mathics/builtin/string/patterns.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+"""
+String Patterns
+"""
+
+import re
+
+from mathics.version import __version__  # noqa used in loading to check consistency.
+
+from mathics.builtin.base import Builtin
+from mathics.core.expression import (
+    Expression,
+    Integer1,
+    SymbolFalse,
+    SymbolTrue,
+    )
+
+
+from mathics.builtin.strings import (
+    anchor_pattern,
+    to_regex,
+)
+
+class StringMatchQ(Builtin):
+    r"""
+    >> StringMatchQ["abc", "abc"]
+     = True
+
+    >> StringMatchQ["abc", "abd"]
+     = False
+
+    >> StringMatchQ["15a94xcZ6", (DigitCharacter | LetterCharacter)..]
+     = True
+
+    #> StringMatchQ["abc1", LetterCharacter]
+     = False
+
+    #> StringMatchQ["abc", "ABC"]
+     = False
+    #> StringMatchQ["abc", "ABC", IgnoreCase -> True]
+     = True
+
+    ## Words containing nonword characters
+    #> StringMatchQ[{"monkey", "don't", "AAA", "S&P"}, ___ ~~ Except[WordCharacter] ~~ ___]
+     = {False, True, False, True}
+
+    ## Try to match a literal number
+    #> StringMatchQ[1.5, NumberString]
+     : String or list of strings expected at position 1 in StringMatchQ[1.5, NumberString].
+     = StringMatchQ[1.5, NumberString]
+
+    Use StringMatchQ as an operator
+    >> StringMatchQ[LetterCharacter]["a"]
+     = True
+
+    ## Abbreviated string patterns Issue #517
+    #> StringMatchQ["abcd", "abc*"]
+     = True
+    #> StringMatchQ["abc", "abc*"]
+     = True
+    #> StringMatchQ["abc\\", "abc\\"]
+     = True
+    #> StringMatchQ["abc*d", "abc\\*d"]
+     = True
+    #> StringMatchQ["abc*d", "abc\\**"]
+     = True
+    #> StringMatchQ["abcde", "a*f"]
+     = False
+
+    #> StringMatchQ["abcde", "a@e"]
+     = True
+    #> StringMatchQ["aBCDe", "a@e"]
+     = False
+    #> StringMatchQ["ae", "a@e"]
+     = False
+    """
+
+    attributes = ("Listable",)
+
+    options = {
+        "IgnoreCase": "False",
+        "SpellingCorrections": "None",
+    }
+
+    messages = {
+        "strse": "String or list of strings expected at position `1` in `2`.",
+    }
+
+    rules = {
+        "StringMatchQ[patt_][expr_]": "StringMatchQ[expr, patt]",
+    }
+
+    def apply(self, string, patt, evaluation, options):
+        "StringMatchQ[string_, patt_, OptionsPattern[%(name)s]]"
+        py_string = string.get_string_value()
+        if py_string is None:
+            return evaluation.message(
+                "StringMatchQ",
+                "strse",
+                Integer1,
+                Expression("StringMatchQ", string, patt),
+            )
+
+        re_patt = to_regex(patt, evaluation, abbreviated_patterns=True)
+        if re_patt is None:
+            return evaluation.message(
+                "StringExpression", "invld", patt, Expression("StringExpression", patt)
+            )
+
+        re_patt = anchor_pattern(re_patt)
+
+        flags = re.MULTILINE
+        if options["System`IgnoreCase"] == SymbolTrue:
+            flags = flags | re.IGNORECASE
+
+        if re.match(re_patt, py_string, flags=flags) is None:
+            return SymbolFalse
+        else:
+            return SymbolTrue
diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py
index beef742fa..ff34b1a38 100644
--- a/mathics/builtin/strings.py
+++ b/mathics/builtin/strings.py
@@ -1,13 +1,11 @@
 # -*- coding: utf-8 -*-
-
 """
-Strings and Characters
+Unsorted Strings and Characters
 """
 
 import io
 import re
 import sys
-from sys import version_info
 import unicodedata
 from binascii import hexlify, unhexlify
 from heapq import heappush, heappop
@@ -33,11 +31,9 @@
     Integer,
     Integer0,
     Integer1,
-    from_python,
     string_list,
 )
 from mathics.core.parser import MathicsFileLineFeeder, parse
-from mathics.builtin.lists import python_seq, convert_seq
 from mathics.settings import SYSTEM_CHARACTER_ENCODING
 from mathics_scanner import TranslateError
 
@@ -295,24 +291,11 @@ def mathics_split(patt, string, flags):
     return [string[start:stop] for start, stop in indices]
 
 
-if version_info >= (3, 0):
-
-    def pack_bytes(codes):
-        return bytes(codes)
-
-    def unpack_bytes(codes):
-        return [int(code) for code in codes]
-
-
-else:
-    from struct import pack, unpack
-
-    def pack_bytes(codes):
-        return pack("B" * len(codes), *codes)
-
-    def unpack_bytes(codes):
-        return unpack("B" * len(codes), codes)
+def pack_bytes(codes):
+    return bytes(codes)
 
+def unpack_bytes(codes):
+    return [int(code) for code in codes]
 
 class SystemCharacterEncoding(Predefined):
     """
@@ -833,205 +816,6 @@ class HexidecimalCharacter(Builtin):
     """
 
 
-class DigitQ(Builtin):
-    """
-    <dl>
-    <dt>'DigitQ[$string$]'
-        yields 'True' if all the characters in the $string$ are digits, and yields 'False' otherwise.
-    </dl>
-
-    >> DigitQ["9"]
-     = True
-
-    >> DigitQ["a"]
-     = False
-
-    >> DigitQ["01001101011000010111010001101000011010010110001101110011"]
-     = True
-
-    >> DigitQ["-123456789"]
-     = False
-
-    """
-
-    rules = {
-        "DigitQ[string_]": (
-            "If[StringQ[string], StringMatchQ[string, DigitCharacter...], False, False]"
-        ),
-    }
-
-
-class LetterQ(Builtin):
-    """
-    <dl>
-    <dt>'LetterQ[$string$]'
-        yields 'True' if all the characters in the $string$ are letters, and yields 'False' otherwise.
-    </dl>
-
-    >> LetterQ["m"]
-     = True
-
-    >> LetterQ["9"]
-     = False
-
-    >> LetterQ["Mathics"]
-     = True
-
-    >> LetterQ["Welcome to Mathics"]
-     = False
-
-    #> LetterQ[""]
-     = True
-
-    #> LetterQ["\\[Alpha]\\[Beta]\\[Gamma]\\[Delta]\\[Epsilon]\\[Zeta]\\[Eta]\\[Theta]"]
-     = True
-    """
-
-    rules = {
-        "LetterQ[string_]": (
-            "If[StringQ[string], StringMatchQ[string, LetterCharacter...], False, False]"
-        ),
-    }
-
-
-class StringMatchQ(Builtin):
-    r"""
-    >> StringMatchQ["abc", "abc"]
-     = True
-
-    >> StringMatchQ["abc", "abd"]
-     = False
-
-    >> StringMatchQ["15a94xcZ6", (DigitCharacter | LetterCharacter)..]
-     = True
-
-    #> StringMatchQ["abc1", LetterCharacter]
-     = False
-
-    #> StringMatchQ["abc", "ABC"]
-     = False
-    #> StringMatchQ["abc", "ABC", IgnoreCase -> True]
-     = True
-
-    ## Words containing nonword characters
-    #> StringMatchQ[{"monkey", "don't", "AAA", "S&P"}, ___ ~~ Except[WordCharacter] ~~ ___]
-     = {False, True, False, True}
-
-    ## Try to match a literal number
-    #> StringMatchQ[1.5, NumberString]
-     : String or list of strings expected at position 1 in StringMatchQ[1.5, NumberString].
-     = StringMatchQ[1.5, NumberString]
-
-    Use StringMatchQ as an operator
-    >> StringMatchQ[LetterCharacter]["a"]
-     = True
-
-    ## Abbreviated string patterns Issue #517
-    #> StringMatchQ["abcd", "abc*"]
-     = True
-    #> StringMatchQ["abc", "abc*"]
-     = True
-    #> StringMatchQ["abc\\", "abc\\"]
-     = True
-    #> StringMatchQ["abc*d", "abc\\*d"]
-     = True
-    #> StringMatchQ["abc*d", "abc\\**"]
-     = True
-    #> StringMatchQ["abcde", "a*f"]
-     = False
-
-    #> StringMatchQ["abcde", "a@e"]
-     = True
-    #> StringMatchQ["aBCDe", "a@e"]
-     = False
-    #> StringMatchQ["ae", "a@e"]
-     = False
-    """
-
-    attributes = ("Listable",)
-
-    options = {
-        "IgnoreCase": "False",
-        "SpellingCorrections": "None",
-    }
-
-    messages = {
-        "strse": "String or list of strings expected at position `1` in `2`.",
-    }
-
-    rules = {
-        "StringMatchQ[patt_][expr_]": "StringMatchQ[expr, patt]",
-    }
-
-    def apply(self, string, patt, evaluation, options):
-        "StringMatchQ[string_, patt_, OptionsPattern[%(name)s]]"
-        py_string = string.get_string_value()
-        if py_string is None:
-            return evaluation.message(
-                "StringMatchQ",
-                "strse",
-                Integer1,
-                Expression("StringMatchQ", string, patt),
-            )
-
-        re_patt = to_regex(patt, evaluation, abbreviated_patterns=True)
-        if re_patt is None:
-            return evaluation.message(
-                "StringExpression", "invld", patt, Expression("StringExpression", patt)
-            )
-
-        re_patt = anchor_pattern(re_patt)
-
-        flags = re.MULTILINE
-        if options["System`IgnoreCase"] == SymbolTrue:
-            flags = flags | re.IGNORECASE
-
-        if re.match(re_patt, py_string, flags=flags) is None:
-            return SymbolFalse
-        else:
-            return SymbolTrue
-
-
-class StringJoin(BinaryOperator):
-    """
-    <dl>
-    <dt>'StringJoin["$s1$", "$s2$", ...]'
-        <dd>returns the concatenation of the strings $s1$, $s2$, ….
-    </dl>
-
-    >> StringJoin["a", "b", "c"]
-     = abc
-    >> "a" <> "b" <> "c" // InputForm
-     = "abc"
-
-    'StringJoin' flattens lists out:
-    >> StringJoin[{"a", "b"}] // InputForm
-     = "ab"
-    >> Print[StringJoin[{"Hello", " ", {"world"}}, "!"]]
-     | Hello world!
-    """
-
-    operator = "<>"
-    precedence = 600
-    attributes = ("Flat", "OneIdentity")
-
-    def apply(self, items, evaluation):
-        "StringJoin[items___]"
-
-        result = ""
-        items = items.flatten(SymbolList)
-        if items.get_head_name() == "System`List":
-            items = items.leaves
-        else:
-            items = items.get_sequence()
-        for item in items:
-            if not isinstance(item, String):
-                evaluation.message("StringJoin", "string")
-                return
-            result += item.value
-        return String(result)
-
-
 class StringSplit(Builtin):
     """
     <dl>
@@ -1137,191 +921,6 @@ def apply(self, string, patt, evaluation, options):
         )
 
 
-class StringPosition(Builtin):
-    """
-    <dl>
-    <dt>'StringPosition["$string$", $patt$]'
-      <dd>gives a list of starting and ending positions where $patt$ matches "$string$".
-    <dt>'StringPosition["$string$", $patt$, $n$]'
-      <dd>returns the first $n$ matches only.
-    <dt>'StringPosition["$string$", {$patt1$, $patt2$, ...}, $n$]'
-      <dd>matches multiple patterns.
-    <dt>'StringPosition[{$s1$, $s2$, ...}, $patt$]'
-      <dd>returns a list of matches for multiple strings.
-    </dl>
-
-    >> StringPosition["123ABCxyABCzzzABCABC", "ABC"]
-     = {{4, 6}, {9, 11}, {15, 17}, {18, 20}}
-
-    >> StringPosition["123ABCxyABCzzzABCABC", "ABC", 2]
-     = {{4, 6}, {9, 11}}
-
-    'StringPosition' can be useful for searching through text.
-    >> data = Import["ExampleData/EinsteinSzilLetter.txt"];
-    >> StringPosition[data, "uranium"]
-     = {{299, 305}, {870, 876}, {1538, 1544}, {1671, 1677}, {2300, 2306}, {2784, 2790}, {3093, 3099}}
-
-    #> StringPosition["123ABCxyABCzzzABCABC", "ABC", -1]
-     : Non-negative integer or Infinity expected at position 3 in StringPosition[123ABCxyABCzzzABCABC, ABC, -1].
-     = StringPosition[123ABCxyABCzzzABCABC, ABC, -1]
-
-    ## Overlaps
-    #> StringPosition["1231221312112332", RegularExpression["[12]+"]]
-     = {{1, 2}, {2, 2}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {9, 13}, {10, 13}, {11, 13}, {12, 13}, {13, 13}, {16, 16}}
-    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> False]
-     = {{1, 2}, {4, 7}, {9, 13}, {16, 16}}
-    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> x]
-     = {{1, 2}, {4, 7}, {9, 13}, {16, 16}}
-    #> StringPosition["1231221312112332", RegularExpression["[12]+"], Overlaps -> All]
-     : Overlaps -> All option is not currently implemented in Mathics.
-     = {{1, 2}, {2, 2}, {4, 7}, {5, 7}, {6, 7}, {7, 7}, {9, 13}, {10, 13}, {11, 13}, {12, 13}, {13, 13}, {16, 16}}
-
-    #> StringPosition["21211121122", {"121", "11"}]
-     = {{2, 4}, {4, 5}, {5, 6}, {6, 8}, {8, 9}}
-    #> StringPosition["21211121122", {"121", "11"}, Overlaps -> False]
-     = {{2, 4}, {5, 6}, {8, 9}}
-
-    #> StringPosition[{"abc", "abcda"}, "a"]
-     = {{{1, 1}}, {{1, 1}, {5, 5}}}
-
-    #> StringPosition[{"abc"}, "a", Infinity]
-     = {{{1, 1}}}
-
-    #> StringPosition["abc"]["123AabcDEabc"]
-     = {{5, 7}, {10, 12}}
-    """
-
-    options = {
-        "IgnoreCase": "False",
-        "MetaCharacters": "None",
-        "Overlaps": "True",
-    }
-
-    messages = {
-        "strse": "String or list of strings expected at position `1` in `2`.",
-        "overall": "Overlaps -> All option is not currently implemented in Mathics.",
-        "innf": "Non-negative integer or Infinity expected at position `2` in `1`.",
-    }
-
-    rules = {
-        "StringPosition[patt_][s_]": "StringPosition[s, patt]",
-    }
-
-    def apply(self, string, patt, evaluation, options):
-        "StringPosition[string_, patt_, OptionsPattern[StringPosition]]"
-
-        return self.apply_n(
-            string,
-            patt,
-            Expression("DirectedInfinity", Integer1),
-            evaluation,
-            options,
-        )
-
-    def apply_n(self, string, patt, n, evaluation, options):
-        "StringPosition[string_, patt_, n:(_Integer|DirectedInfinity[1]), OptionsPattern[StringPosition]]"
-
-        expr = Expression("StringPosition", string, patt, n)
-
-        # check n
-        if n.has_form("DirectedInfinity", 1):
-            py_n = float("inf")
-        else:
-            py_n = n.get_int_value()
-            if py_n is None or py_n < 0:
-                return evaluation.message("StringPosition", "innf", expr, Integer(3))
-
-        # check options
-        if options["System`Overlaps"] == SymbolTrue:
-            overlap = True
-        elif options["System`Overlaps"] == SymbolFalse:
-            overlap = False
-        elif options["System`Overlaps"] == Symbol("All"):
-            # TODO
-            evaluation.message("StringPosition", "overall")
-            overlap = True
-        else:
-            overlap = False  # unknown options are teated as False
-
-        # convert patterns
-        if patt.has_form("List", None):
-            patts = patt.get_leaves()
-        else:
-            patts = [patt]
-        re_patts = []
-        for p in patts:
-            py_p = to_regex(p, evaluation)
-            if py_p is None:
-                return evaluation.message("StringExpression", "invld", p, patt)
-            re_patts.append(py_p)
-        compiled_patts = [re.compile(re_patt) for re_patt in re_patts]
-
-        # string or list of strings
-        if string.has_form("List", None):
-            py_strings = [s.get_string_value() for s in string.leaves]
-            if None in py_strings:
-                return
-            results = [
-                self.do_apply(py_string, compiled_patts, py_n, overlap)
-                for py_string in py_strings
-            ]
-            return Expression(SymbolList, *results)
-        else:
-            py_string = string.get_string_value()
-            if py_string is None:
-                return
-            return self.do_apply(py_string, compiled_patts, py_n, overlap)
-
-    @staticmethod
-    def do_apply(py_string, compiled_patts, py_n, overlap):
-        result = []
-        start = 0
-        while start < len(py_string):
-            found_match = False
-            for compiled_patt in compiled_patts:
-                m = compiled_patt.match(py_string, start)
-                if m is None:
-                    continue
-                found_match = True
-                result.append([m.start() + 1, m.end()])  # 0 to 1 based indexing
-                if len(result) == py_n:
-                    return from_python(result)
-                if not overlap:
-                    start = m.end()
-            if overlap or not found_match:
-                start += 1
-        return from_python(result)
-
-
-class StringLength(Builtin):
-    """
-    <dl>
-    <dt>'StringLength["$string$"]'
-        <dd>gives the length of $string$.
-    </dl>
-
-    >> StringLength["abc"]
-     = 3
-    'StringLength' is listable:
-    >> StringLength[{"a", "bc"}]
-     = {1, 2}
-
-    >> StringLength[x]
-     : String expected.
-     = StringLength[x]
-    """
-
-    attributes = ("Listable",)
-
-    def apply(self, str, evaluation):
-        "StringLength[str_]"
-
-        if not isinstance(str, String):
-            evaluation.message("StringLength", "string")
-            return
-        return Integer(len(str.value))
-
-
 class _StringFind(Builtin):
     attributes = "Protected"
 
@@ -1410,139 +1009,6 @@ def convert_rule(r):
             return self._find(py_strings, py_rules, py_n, flags, evaluation)
 
 
-class StringReplace(_StringFind):
-    """
-    <dl>
-    <dt>'StringReplace["$string$", "$a$"->"$b$"]'
-        <dd>replaces each occurrence of $old$ with $new$ in $string$.
-    <dt>'StringReplace["$string$", {"$s1$"->"$sp1$", "$s2$"->"$sp2$"}]'
-        <dd>performs multiple replacements of each $si$ by the
-        corresponding $spi$ in $string$.
-    <dt>'StringReplace["$string$", $srules$, $n$]'
-        <dd>only performs the first $n$ replacements.
-    <dt>'StringReplace[{"$string1$", "$string2$", ...}, $srules$]'
-        <dd>performs the replacements specified by $srules$ on a list
-        of strings.
-    </dl>
-
-    StringReplace replaces all occurrences of one substring with another:
-    >> StringReplace["xyxyxyyyxxxyyxy", "xy" -> "A"]
-     = AAAyyxxAyA
-
-    Multiple replacements can be supplied:
-    >> StringReplace["xyzwxyzwxxyzxyzw", {"xyz" -> "A", "w" -> "BCD"}]
-     = ABCDABCDxAABCD
-
-    Only replace the first 2 occurences:
-    >> StringReplace["xyxyxyyyxxxyyxy", "xy" -> "A", 2]
-     = AAxyyyxxxyyxy
-
-    Also works for multiple rules:
-    >> StringReplace["abba", {"a" -> "A", "b" -> "B"}, 2]
-     = ABba
-
-    StringReplace acts on lists of strings too:
-    >> StringReplace[{"xyxyxxy", "yxyxyxxxyyxy"}, "xy" -> "A"]
-     = {AAxA, yAAxxAyA}
-
-    #> StringReplace["abcabc", "a" -> "b", Infinity]
-     = bbcbbc
-    #> StringReplace[x, "a" -> "b"]
-     : String or list of strings expected at position 1 in StringReplace[x, a -> b].
-     = StringReplace[x, a -> b]
-    #> StringReplace["xyzwxyzwaxyzxyzw", x]
-     : x is not a valid string replacement rule.
-     = StringReplace[xyzwxyzwaxyzxyzw, x]
-    #> StringReplace["xyzwxyzwaxyzxyzw", x -> y]
-     : Element x is not a valid string or pattern element in x.
-     = StringReplace[xyzwxyzwaxyzxyzw, x -> y]
-    #> StringReplace["abcabc", "a" -> "b", -1]
-     : Non-negative integer or Infinity expected at position 3 in StringReplace[abcabc, a -> b, -1].
-     = StringReplace[abcabc, a -> b, -1]
-    #> StringReplace["abc", "b" -> 4]
-     : String expected.
-     = a <> 4 <> c
-
-    #> StringReplace["01101100010", "01" .. -> "x"]
-     = x1x100x0
-
-    #> StringReplace["abc abcb abdc", "ab" ~~ _ -> "X"]
-     = X Xb Xc
-
-    #> StringReplace["abc abcd abcd",  WordBoundary ~~ "abc" ~~ WordBoundary -> "XX"]
-     = XX abcd abcd
-
-    #> StringReplace["abcd acbd", RegularExpression["[ab]"] -> "XX"]
-     = XXXXcd XXcXXd
-
-    #> StringReplace["abcd acbd", RegularExpression["[ab]"] ~~ _ -> "YY"]
-     = YYcd YYYY
-
-    #> StringReplace["abcdabcdaabcabcd", {"abc" -> "Y", "d" -> "XXX"}]
-     = YXXXYXXXaYYXXX
-
-
-    #> StringReplace["  Have a nice day.  ", (StartOfString ~~ Whitespace) | (Whitespace ~~ EndOfString) -> ""] // FullForm
-     = "Have a nice day."
-
-    #> StringReplace["xyXY", "xy" -> "01"]
-     = 01XY
-    #> StringReplace["xyXY", "xy" -> "01", IgnoreCase -> True]
-     = 0101
-
-    StringReplace also can be used as an operator:
-    >> StringReplace["y" -> "ies"]["city"]
-     = cities
-    """
-
-    # TODO Special Characters
-    """
-    #> StringReplace["product: A \\[CirclePlus] B" , "\\[CirclePlus]" -> "x"]
-     = A x B
-    """
-
-    rules = {
-        "StringReplace[rule_][string_]": "StringReplace[string, rule]",
-    }
-
-    def _find(self, py_stri, py_rules, py_n, flags, evaluation):
-        def cases():
-            k = 0
-            for match, form in _parallel_match(py_stri, py_rules, flags, py_n):
-                start, end = match.span()
-                if start > k:
-                    yield String(py_stri[k:start])
-                yield _evaluate_match(form, match, evaluation)
-                k = end
-            if k < len(py_stri):
-                yield String(py_stri[k:])
-
-        return Expression("StringJoin", *list(cases()))
-
-    def apply(self, string, rule, n, evaluation, options):
-        "%(name)s[string_, rule_, OptionsPattern[%(name)s], n_:System`Private`Null]"
-        # this pattern is a slight hack to get around missing Shortest/Longest.
-        return self._apply(string, rule, n, evaluation, options, False)
-
-
-class StringReverse(Builtin):
-    """
-    <dl>
-      <dt>'StringReverse["$string$"]'
-      <dd>reverses the order of the characters in "string".
-      </dl>
-
-      >> StringReverse["live"]
-       = evil
-    """
-
-    attributes = ("Listable", "Protected")
-
-    def apply(self, string, evaluation):
-        "StringReverse[string_String]"
-        return String(string.get_string_value()[::-1])
-
-
 class StringCases(_StringFind):
     """
     <dl>
@@ -1658,70 +1124,6 @@ def apply_truncated(self, s, n, m, expression, evaluation):
             return String((py_s * py_n)[:py_m])
 
 
-class Characters(Builtin):
-    """
-    <dl>
-    <dt>'Characters["$string$"]'
-        <dd>returns a list of the characters in $string$.
-    </dl>
-
-    >> Characters["abc"]
-     = {a, b, c}
-
-    #> \\.78\\.79\\.7A
-     = xyz
-
-    #> \\:0078\\:0079\\:007A
-     = xyz
-
-    #> \\101\\102\\103\\061\\062\\063
-     = ABC123
-
-    #> \\[Alpha]\\[Beta]\\[Gamma]
-     = \u03B1\u03B2\u03B3
-    """
-
-    attributes = ("Listable",)
-
-    def apply(self, string, evaluation):
-        "Characters[string_String]"
-
-        return Expression(SymbolList, *(String(c) for c in string.value))
-
-
-class CharacterRange(Builtin):
-    """
-    <dl>
-    <dt>'CharacterRange["$a$", "$b$"]'
-        <dd>returns a list of the Unicode characters from $a$ to $b$
-        inclusive.
-    </dl>
-
-    >> CharacterRange["a", "e"]
-     = {a, b, c, d, e}
-    >> CharacterRange["b", "a"]
-     = {}
-    """
-
-    attributes = ("ReadProtected",)
-
-    messages = {
-        "argtype": "Arguments `1` and `2` are not both strings of length 1.",
-    }
-
-    def apply(self, start, stop, evaluation):
-        "CharacterRange[start_String, stop_String]"
-
-        if len(start.value) != 1 or len(stop.value) != 1:
-            evaluation.message("CharacterRange", "argtype", start, stop)
-            return
-        start = ord(start.value[0])
-        stop = ord(stop.value[0])
-        return Expression(
-            "List", *[String(chr(code)) for code in range(start, stop + 1)]
-        )
-
-
 class String_(Builtin):
     """
     <dl>
@@ -1746,80 +1148,6 @@ class String_(Builtin):
     name = "String"
 
 
-class LowerCaseQ(Test):
-    """
-    <dl>
-    <dt>'LowerCaseQ[$s$]'
-        <dd>returns True if $s$ consists wholly of lower case characters.
-    </dl>
-
-    >> LowerCaseQ["abc"]
-     = True
-
-    An empty string returns True.
-    >> LowerCaseQ[""]
-     = True
-    """
-
-    def test(self, s):
-        return isinstance(s, String) and all(c.islower() for c in s.get_string_value())
-
-
-class ToLowerCase(Builtin):
-    """
-    <dl>
-    <dt>'ToLowerCase[$s$]'
-        <dd>returns $s$ in all lower case.
-    </dl>
-
-    >> ToLowerCase["New York"]
-     = new york
-    """
-
-    attributes = ("Listable", "Protected")
-
-    def apply(self, s, evaluation):
-        "ToLowerCase[s_String]"
-        return String(s.get_string_value().lower())
-
-
-class UpperCaseQ(Test):
-    """
-    <dl>
-    <dt>'UpperCaseQ[$s$]'
-        <dd>returns True if $s$ consists wholly of upper case characters.
-    </dl>
-
-    >> UpperCaseQ["ABC"]
-     = True
-
-    An empty string returns True.
-    >> UpperCaseQ[""]
-     = True
-    """
-
-    def test(self, s):
-        return isinstance(s, String) and all(c.isupper() for c in s.get_string_value())
-
-
-class ToUpperCase(Builtin):
-    """
-    <dl>
-    <dt>'ToUpperCase[$s$]'
-        <dd>returns $s$ in all upper case.
-    </dl>
-
-    >> ToUpperCase["New York"]
-     = NEW YORK
-    """
-
-    attributes = ("Listable", "Protected")
-
-    def apply(self, s, evaluation):
-        "ToUpperCase[s_String]"
-        return String(s.get_string_value().upper())
-
-
 class ToString(Builtin):
     """
     <dl>
@@ -2287,209 +1615,6 @@ def test(self, expr):
         return isinstance(expr, String)
 
 
-class StringTake(Builtin):
-    """
-    <dl>
-      <dt>'StringTake["$string$", $n$]'
-      <dd>gives the first $n$ characters in $string$.
-
-      <dt>'StringTake["$string$", -$n$]'
-      <dd>gives the last $n$ characters in $string$.
-
-      <dt>'StringTake["$string$", {$n$}]'
-      <dd>gives the $n$th character in $string$.
-
-      <dt>'StringTake["$string$", {$m$, $n$}]'
-      <dd>gives characters $m$ through $n$ in $string$.
-
-      <dt>'StringTake["$string$", {$m$, $n$, $s$}]'
-      <dd>gives characters $m$ through $n$ in steps of $s$.
-
-      <dt>'StringTake[{$s1$, $s2$, ...} $spec$}]'
-      <dd>gives the list of results for each of the $si$.
-    </dl>
-
-    >> StringTake["abcde", 2]
-     = ab
-    >> StringTake["abcde", 0]
-     = #<--#
-    >> StringTake["abcde", -2]
-     = de
-    >> StringTake["abcde", {2}]
-     = b
-    >> StringTake["abcd", {2,3}]
-     = bc
-    >> StringTake["abcdefgh", {1, 5, 2}]
-     = ace
-
-    Take the last 2 characters from several strings:
-    >> StringTake[{"abcdef", "stuv", "xyzw"}, -2]
-     = {ef, uv, zw}
-
-    StringTake also supports standard sequence specifications
-    >> StringTake["abcdef", All]
-     = abcdef
-
-    #> StringTake["abcd", 0] // InputForm
-    = ""
-    #> StringTake["abcd", {3, 2}] // InputForm
-    = ""
-    #> StringTake["", {1, 0}] // InputForm
-    = ""
-
-    #> StringTake["abc", {0, 0}]
-    : Cannot take positions 0 through 0 in "abc".
-    = StringTake[abc, {0, 0}]
-
-    #> StringTake[{2, 4},2]
-     : String or list of strings expected at position 1.
-     = StringTake[{2, 4}, 2]
-
-    #> StringTake["kkkl",Graphics[{}]]
-     : Integer or a list of sequence specifications expected at position 2.
-     = StringTake[kkkl, -Graphics-]
-    """
-
-    messages = {
-        "strse": "String or list of strings expected at position 1.",
-        # FIXME: mseqs should be: Sequence specification (+n, -n, {+n}, {-n}, {m, n}, or {m, n, s}) or a list
-        # of sequence specifications expected at position 2 in
-        "mseqs": "Integer or a list of sequence specifications expected at position 2.",
-        "take": 'Cannot take positions `1` through `2` in "`3`".',
-    }
-
-    def apply(self, string, seqspec, evaluation):
-        "StringTake[string_String, seqspec_]"
-        result = string.get_string_value()
-        if result is None:
-            return evaluation.message("StringTake", "strse")
-
-        if isinstance(seqspec, Integer):
-            pos = seqspec.get_int_value()
-            if pos >= 0:
-                seq = (1, pos, 1)
-            else:
-                seq = (pos, None, 1)
-        else:
-            seq = convert_seq(seqspec)
-
-        if seq is None:
-            return evaluation.message("StringTake", "mseqs")
-
-        start, stop, step = seq
-        py_slice = python_seq(start, stop, step, len(result))
-
-        if py_slice is None:
-            return evaluation.message("StringTake", "take", start, stop, string)
-
-        return String(result[py_slice])
-
-    def apply_strings(self, strings, spec, evaluation):
-        "StringTake[strings__, spec_]"
-        result_list = []
-        for string in strings.leaves:
-            result = self.apply(string, spec, evaluation)
-            if result is None:
-                return None
-            result_list.append(result)
-        return Expression("List", *result_list)
-
-
-class StringDrop(Builtin):
-    """
-    <dl>
-    <dt>'StringDrop["$string$", $n$]'
-        <dd>gives $string$ with the first $n$ characters dropped.
-    <dt>'StringDrop["$string$", -$n$]'
-        <dd>gives $string$ with the last $n$ characters dropped.
-    <dt>'StringDrop["$string$", {$n$}]'
-        <dd>gives $string$ with the $n$th character dropped.
-    <dt>'StringDrop["$string$", {$m$, $n$}]'
-        <dd>gives $string$ with the characters $m$ through $n$ dropped.
-    </dl>
-
-    >> StringDrop["abcde", 2]
-    = cde
-    >> StringDrop["abcde", -2]
-    = abc
-    >> StringDrop["abcde", {2}]
-    = acde
-    >> StringDrop["abcde", {2,3}]
-    = ade
-    >> StringDrop["abcd",{3,2}]
-    = abcd
-    >> StringDrop["abcd",0]
-    = abcd
-    """
-
-    messages = {
-        "strse": "String expected at position 1.",
-        "mseqs": "Integer or list of two Integers are expected at position 2.",
-        "drop": 'Cannot drop positions `1` through `2` in "`3`".',
-    }
-
-    def apply_with_n(self, string, n, evaluation):
-        "StringDrop[string_,n_Integer]"
-        if not isinstance(string, String):
-            return evaluation.message("StringDrop", "strse")
-        if isinstance(n, Integer):
-            pos = n.value
-            if pos > len(string.get_string_value()):
-                return evaluation.message("StringDrop", "drop", 1, pos, string)
-            if pos < -len(string.get_string_value()):
-                return evaluation.message("StringDrop", "drop", pos, -1, string)
-            if pos > 0:
-                return String(string.get_string_value()[pos:])
-            if pos < 0:
-                return String(string.get_string_value()[:(pos)])
-            if pos == 0:
-                return string
-        return evaluation.message("StringDrop", "mseqs")
-
-    def apply_with_ni_nf(self, string, ni, nf, evaluation):
-        "StringDrop[string_,{ni_Integer,nf_Integer}]"
-        if not isinstance(string, String):
-            return evaluation.message("StringDrop", "strse", string)
-
-        if ni.value == 0 or nf.value == 0:
-            return evaluation.message("StringDrop", "drop", ni, nf)
-        fullstring = string.get_string_value()
-        lenfullstring = len(fullstring)
-        posi = ni.value
-        if posi < 0:
-            posi = lenfullstring + posi + 1
-        posf = nf.value
-        if posf < 0:
-            posf = lenfullstring + posf + 1
-        if posf > lenfullstring or posi > lenfullstring or posf <= 0 or posi <= 0:
-            # positions out or range
-            return evaluation.message("StringDrop", "drop", ni, nf, fullstring)
-        if posf < posi:
-            return string  # this is what actually mma does
-        return String(fullstring[: (posi - 1)] + fullstring[posf:])
-
-    def apply_with_ni(self, string, ni, evaluation):
-        "StringDrop[string_,{ni_Integer}]"
-        if not isinstance(string, String):
-            return evaluation.message("StringDrop", "strse", string)
-        if ni.value == 0:
-            return evaluation.message("StringDrop", "drop", ni, ni)
-        fullstring = string.get_string_value()
-        lenfullstring = len(fullstring)
-        posi = ni.value
-        if posi < 0:
-            posi = lenfullstring + posi + 1
-        if posi > lenfullstring or posi <= 0:
-            return evaluation.message("StringDrop", "drop", ni, ni, fullstring)
-        return String(fullstring[: (posi - 1)] + fullstring[posi:])
-
-    def apply(self, string, something, evaluation):
-        "StringDrop[string_,something___]"
-        if not isinstance(string, String):
-            return evaluation.message("StringDrop", "strse")
-        return evaluation.message("StringDrop", "mseqs")
-
-
 class HammingDistance(Builtin):
     """
     <dl>
@@ -2844,189 +1969,6 @@ def apply_pattern(self, s, patt, expression, evaluation):
         return String(text[left:right])
 
 
-class StringInsert(Builtin):
-    """
-    <dl>
-      <dt>'StringInsert["$string$", "$snew$", $n$]'
-      <dd>yields a string with $snew$ inserted starting at position $n$ in $string$.
-
-      <dt>'StringInsert["$string$", "$snew$", -$n$]'
-      <dd>inserts a at position $n$ from the end of "$string$".
-
-      <dt>'StringInsert["$string$", "$snew$", {$n_1$, $n_2$, ...}]'
-      <dd>inserts a copy of $snew$ at each position $n_i$ in $string$;
-        the $n_i$ are taken before any insertion is done.
-
-      <dt>'StringInsert[{$s_1$, $s_2$, ...}, "$snew$", $n$]'
-      <dd>gives the list of resutls for each of the $s_i$.
-    </dl>
-
-    >> StringInsert["noting", "h", 4]
-     = nothing
-
-    #> StringInsert["abcdefghijklm", "X", 15]
-     : Cannot insert at position 15 in abcdefghijklm.
-     = StringInsert[abcdefghijklm, X, 15]
-
-    #> StringInsert[abcdefghijklm, "X", 4]
-     : String or list of strings expected at position 1 in StringInsert[abcdefghijklm, X, 4].
-     = StringInsert[abcdefghijklm, X, 4]
-
-    #> StringInsert["abcdefghijklm", X, 4]
-     : String expected at position 2 in StringInsert[abcdefghijklm, X, 4].
-     = StringInsert[abcdefghijklm, X, 4]
-
-    #> StringInsert["abcdefghijklm", "X", a]
-     : Position specification a in StringInsert[abcdefghijklm, X, a] is not a machine-sized integer or a list of machine-sized integers.
-     = StringInsert[abcdefghijklm, X, a]
-
-    #> StringInsert["abcdefghijklm", "X", 0]
-     : Cannot insert at position 0 in abcdefghijklm.
-     =  StringInsert[abcdefghijklm, X, 0]
-
-    >> StringInsert["note", "d", -1]
-     = noted
-
-    >> StringInsert["here", "t", -5]
-     = there
-
-    #> StringInsert["abcdefghijklm", "X", -15]
-     : Cannot insert at position -15 in abcdefghijklm.
-     = StringInsert[abcdefghijklm, X, -15]
-
-    >> StringInsert["adac", "he", {1, 5}]
-     = headache
-
-    #> StringInsert["abcdefghijklm", "X", {1, -1, 14, -14}]
-     = XXabcdefghijklmXX
-
-    #> StringInsert["abcdefghijklm", "X", {1, 0}]
-     : Cannot insert at position 0 in abcdefghijklm.
-     = StringInsert[abcdefghijklm, X, {1, 0}]
-
-    #> StringInsert["", "X", {1}]
-     = X
-
-    #> StringInsert["", "X", {1, -1}]
-     = XX
-
-    #> StringInsert["", "", {1}]
-     = #<--#
-
-    #> StringInsert["", "X", {1, 2}]
-     : Cannot insert at position 2 in .
-     = StringInsert[, X, {1, 2}]
-
-    #> StringInsert["abcdefghijklm", "", {1, 2, 3, 4 ,5, -6}]
-     = abcdefghijklm
-
-    #> StringInsert["abcdefghijklm", "X", {}]
-     = abcdefghijklm
-
-    >> StringInsert[{"something", "sometimes"}, " ", 5]
-     = {some thing, some times}
-
-    #> StringInsert[{"abcdefghijklm", "Mathics"}, "X", 13]
-     : Cannot insert at position 13 in Mathics.
-     = {abcdefghijklXm, StringInsert[Mathics, X, 13]}
-
-    #> StringInsert[{"", ""}, "", {1, 1, 1, 1}]
-     = {, }
-
-    #> StringInsert[{"abcdefghijklm", "Mathics"}, "X", {0, 2}]
-     : Cannot insert at position 0 in abcdefghijklm.
-     : Cannot insert at position 0 in Mathics.
-     = {StringInsert[abcdefghijklm, X, {0, 2}], StringInsert[Mathics, X, {0, 2}]}
-
-    #> StringInsert[{"abcdefghijklm", Mathics}, "X", {1, 2}]
-     : String or list of strings expected at position 1 in StringInsert[{abcdefghijklm, Mathics}, X, {1, 2}].
-     = StringInsert[{abcdefghijklm, Mathics}, X, {1, 2}]
-
-    #> StringInsert[{"", "Mathics"}, "X", {1, 1, -1}]
-     = {XXX, XXMathicsX}
-
-    >> StringInsert["1234567890123456", ".", Range[-16, -4, 3]]
-     = 1.234.567.890.123.456"""
-
-    messages = {
-        "strse": "String or list of strings expected at position `1` in `2`.",
-        "string": "String expected at position `1` in `2`.",
-        "ins": "Cannot insert at position `1` in `2`.",
-        "psl": "Position specification `1` in `2` is not a machine-sized integer or a list of machine-sized integers.",
-    }
-
-    def _insert(self, str, add, lpos, evaluation):
-        for pos in lpos:
-            if abs(pos) < 1 or abs(pos) > len(str) + 1:
-                evaluation.message("StringInsert", "ins", Integer(pos), String(str))
-                return evaluation.format_output(
-                    Expression(
-                        "StringInsert", str, add, lpos[0] if len(lpos) == 1 else lpos
-                    )
-                )
-
-        # Create new list of position which are rearranged
-        pos_limit = len(str) + 2
-        listpos = [p if p > 0 else pos_limit + p for p in lpos]
-        listpos.sort()
-
-        result = ""
-        start = 0
-        for pos in listpos:
-            stop = pos - 1
-            result += str[start:stop] + add
-            start = stop
-        else:
-            result += str[start : len(str)]
-
-        return result
-
-    def apply(self, strsource, strnew, pos, evaluation):
-        "StringInsert[strsource_, strnew_, pos_]"
-
-        exp = Expression("StringInsert", strsource, strnew, pos)
-
-        py_strnew = strnew.get_string_value()
-        if py_strnew is None:
-            return evaluation.message("StringInsert", "string", Integer(2), exp)
-
-        # Check and create list of position
-        listpos = []
-        if pos.has_form("List", None):
-            leaves = pos.get_leaves()
-            if not leaves:
-                return strsource
-            else:
-                for i, posi in enumerate(leaves):
-                    py_posi = posi.get_int_value()
-                    if py_posi is None:
-                        return evaluation.message("StringInsert", "psl", pos, exp)
-                    listpos.append(py_posi)
-        else:
-            py_pos = pos.get_int_value()
-            if py_pos is None:
-                return evaluation.message("StringInsert", "psl", pos, exp)
-            listpos.append(py_pos)
-
-        # Check and perform the insertion
-        if strsource.has_form("List", None):
-            py_strsource = [sub.get_string_value() for sub in strsource.leaves]
-            if any(sub is None for sub in py_strsource):
-                return evaluation.message("StringInsert", "strse", Integer1, exp)
-            return Expression(
-                "List",
-                *[
-                    String(self._insert(s, py_strnew, listpos, evaluation))
-                    for s in py_strsource
-                ]
-            )
-        else:
-            py_strsource = strsource.get_string_value()
-            if py_strsource is None:
-                return evaluation.message("StringInsert", "strse", Integer1, exp)
-            return String(self._insert(py_strsource, py_strnew, listpos, evaluation))
-
-
 def _pattern_search(name, string, patt, evaluation, options, matched):
     # Get the pattern list and check validity for each
     if patt.has_form("List", None):

From d976d9abbba9f87a5cc7a35502753059747cb4eb Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 29 Jun 2021 21:08:51 -0400
Subject: [PATCH 2/7] Split off Character Codes

---
 mathics/builtin/string/charcodes.py | 272 ++++++++++++++++++++++++++++
 mathics/builtin/strings.py          | 252 --------------------------
 setup.py                            |   1 +
 3 files changed, 273 insertions(+), 252 deletions(-)
 create mode 100644 mathics/builtin/string/charcodes.py

diff --git a/mathics/builtin/string/charcodes.py b/mathics/builtin/string/charcodes.py
new file mode 100644
index 000000000..6010fa831
--- /dev/null
+++ b/mathics/builtin/string/charcodes.py
@@ -0,0 +1,272 @@
+# -*- coding: utf-8 -*-
+"""
+Character Codes
+"""
+
+from mathics.version import __version__  # noqa used in loading to check consistency.
+
+from mathics.builtin.base import Builtin
+
+from mathics.core.expression import (
+    Expression,
+    Integer,
+    Integer1,
+    String,
+    SymbolList,
+)
+
+from mathics.builtin.strings import (
+    _encodings,
+    to_python_encoding
+    )
+
+def pack_bytes(codes):
+    return bytes(codes)
+
+def unpack_bytes(codes):
+    return [int(code) for code in codes]
+
+class ToCharacterCode(Builtin):
+    u"""
+    <dl>
+    <dt>'ToCharacterCode["$string$"]'
+      <dd>converts the string to a list of character codes (Unicode
+      codepoints).
+    <dt>'ToCharacterCode[{"$string1$", "$string2$", ...}]'
+      <dd>converts a list of strings to character codes.
+    </dl>
+
+    >> ToCharacterCode["abc"]
+     = {97, 98, 99}
+    >> FromCharacterCode[%]
+     = abc
+
+    >> ToCharacterCode["\\[Alpha]\\[Beta]\\[Gamma]"]
+     = {945, 946, 947}
+
+    >> ToCharacterCode["ä", "UTF8"]
+     = {195, 164}
+
+    >> ToCharacterCode["ä", "ISO8859-1"]
+     = {228}
+
+    >> ToCharacterCode[{"ab", "c"}]
+     = {{97, 98}, {99}}
+
+    #> ToCharacterCode[{"ab"}]
+     = {{97, 98}}
+
+    #> ToCharacterCode[{{"ab"}}]
+     : String or list of strings expected at position 1 in ToCharacterCode[{{ab}}].
+     = ToCharacterCode[{{ab}}]
+
+    >> ToCharacterCode[{"ab", x}]
+     : String or list of strings expected at position 1 in ToCharacterCode[{ab, x}].
+     = ToCharacterCode[{ab, x}]
+
+    >> ListPlot[ToCharacterCode["plot this string"], Filling -> Axis]
+     = -Graphics-
+
+    #> ToCharacterCode[x]
+     : String or list of strings expected at position 1 in ToCharacterCode[x].
+     = ToCharacterCode[x]
+
+    #> ToCharacterCode[""]
+     = {}
+    """
+
+    messages = {
+        "strse": "String or list of strings expected at position `1` in `2`.",
+    }
+
+    def _encode(self, string, encoding, evaluation):
+        exp = Expression("ToCharacterCode", string)
+
+        if string.has_form("List", None):
+            string = [substring.get_string_value() for substring in string.leaves]
+            if any(substring is None for substring in string):
+                evaluation.message("ToCharacterCode", "strse", Integer1, exp)
+                return None
+        else:
+            string = string.get_string_value()
+            if string is None:
+                evaluation.message("ToCharacterCode", "strse", Integer1, exp)
+                return None
+
+        if encoding == "Unicode":
+
+            def convert(s):
+                return Expression(SymbolList, *[Integer(ord(code)) for code in s])
+
+        else:
+            py_encoding = to_python_encoding(encoding)
+            if py_encoding is None:
+                evaluation.message("General", "charcode", encoding)
+                return
+
+            def convert(s):
+                return Expression(
+                    "List", *[Integer(x) for x in unpack_bytes(s.encode(py_encoding))]
+                )
+
+        if isinstance(string, list):
+            return Expression(SymbolList, *[convert(substring) for substring in string])
+        elif isinstance(string, str):
+            return convert(string)
+
+    def apply_default(self, string, evaluation):
+        "ToCharacterCode[string_]"
+        return self._encode(string, "Unicode", evaluation)
+
+    def apply(self, string, encoding, evaluation):
+        "ToCharacterCode[string_, encoding_String]"
+        return self._encode(string, encoding.get_string_value(), evaluation)
+
+
+class _InvalidCodepointError(ValueError):
+    pass
+
+
+class FromCharacterCode(Builtin):
+    """
+    <dl>
+    <dt>'FromCharacterCode[$n$]'
+        <dd>returns the character corresponding to Unicode codepoint $n$.
+    <dt>'FromCharacterCode[{$n1$, $n2$, ...}]'
+        <dd>returns a string with characters corresponding to $n_i$.
+    <dt>'FromCharacterCode[{{$n11$, $n12$, ...}, {$n21$, $n22$, ...}, ...}]'
+        <dd>returns a list of strings.
+    </dl>
+
+    >> FromCharacterCode[100]
+     = d
+
+    >> FromCharacterCode[228, "ISO8859-1"]
+     = ä
+
+    >> FromCharacterCode[{100, 101, 102}]
+     = def
+    >> ToCharacterCode[%]
+     = {100, 101, 102}
+
+    >> FromCharacterCode[{{97, 98, 99}, {100, 101, 102}}]
+     = {abc, def}
+
+    >> ToCharacterCode["abc 123"] // FromCharacterCode
+     = abc 123
+
+    #> #1 == ToCharacterCode[FromCharacterCode[#1]] & [RandomInteger[{0, 65535}, 100]]
+     = True
+
+    #> FromCharacterCode[{}] // InputForm
+     = ""
+
+    #> FromCharacterCode[65536]
+     : A character code, which should be a non-negative integer less than 65536, is expected at position 1 in {65536}.
+     = FromCharacterCode[65536]
+    #> FromCharacterCode[-1]
+     : Non-negative machine-sized integer expected at position 1 in FromCharacterCode[-1].
+     = FromCharacterCode[-1]
+    #> FromCharacterCode[444444444444444444444444444444444444]
+     : Non-negative machine-sized integer expected at position 1 in FromCharacterCode[444444444444444444444444444444444444].
+     = FromCharacterCode[444444444444444444444444444444444444]
+
+    #> FromCharacterCode[{100, 101, -1}]
+     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, -1}.
+     = FromCharacterCode[{100, 101, -1}]
+    #> FromCharacterCode[{100, 101, 65536}]
+     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, 65536}.
+     = FromCharacterCode[{100, 101, 65536}]
+    #> FromCharacterCode[{100, 101, x}]
+     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, x}.
+     = FromCharacterCode[{100, 101, x}]
+    #> FromCharacterCode[{100, {101}}]
+     : A character code, which should be a non-negative integer less than 65536, is expected at position 2 in {100, {101}}.
+     = FromCharacterCode[{100, {101}}]
+
+    #> FromCharacterCode[{{97, 98, 99}, {100, 101, x}}]
+     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, x}.
+     = FromCharacterCode[{{97, 98, 99}, {100, 101, x}}]
+    #> FromCharacterCode[{{97, 98, x}, {100, 101, x}}]
+     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {97, 98, x}.
+     = FromCharacterCode[{{97, 98, x}, {100, 101, x}}]
+    """
+
+    messages = {
+        "notunicode": (
+            "A character code, which should be a non-negative integer less "
+            "than 65536, is expected at position `2` in `1`."
+        ),
+        "intnm": (
+            "Non-negative machine-sized integer expected at " "position `2` in `1`."
+        ),
+        "utf8": "The given codes could not be decoded as utf-8.",
+    }
+
+    def _decode(self, n, encoding, evaluation):
+        exp = Expression("FromCharacterCode", n)
+
+        py_encoding = to_python_encoding(encoding)
+        if py_encoding is None:
+            evaluation.message("General", "charcode", encoding)
+            return
+
+        def convert_codepoint_list(l):
+            if encoding == "Unicode":
+                s = ""
+                for i, ni in enumerate(l):
+                    pyni = ni.get_int_value()
+                    if not (pyni is not None and 0 <= pyni <= 0xFFFF):
+                        evaluation.message(
+                            "FromCharacterCode",
+                            "notunicode",
+                            Expression(SymbolList, *l),
+                            Integer(i + 1),
+                        )
+                        raise _InvalidCodepointError
+                    s += chr(pyni)
+                return s
+            else:
+                codes = [x.get_int_value() & 0xFF for x in l]
+                return pack_bytes(codes).decode(py_encoding)
+
+        try:
+            if n.has_form("List", None):
+                if not n.get_leaves():
+                    return String("")
+                # Mathematica accepts FromCharacterCode[{{100}, 101}],
+                # so to match this, just check the first leaf to see
+                # if we're dealing with nested lists.
+                elif n.get_leaves()[0].has_form("List", None):
+                    list_of_strings = []
+                    for leaf in n.get_leaves():
+                        if leaf.has_form("List", None):
+                            stringi = convert_codepoint_list(leaf.get_leaves())
+                        else:
+                            stringi = convert_codepoint_list([leaf])
+                        list_of_strings.append(String(stringi))
+                    return Expression(SymbolList, *list_of_strings)
+                else:
+                    return String(convert_codepoint_list(n.get_leaves()))
+            else:
+                pyn = n.get_int_value()
+                if not (isinstance(pyn, int) and pyn > 0 and pyn < sys.maxsize):
+                    return evaluation.message(
+                        "FromCharacterCode", "intnm", exp, Integer1
+                    )
+                return String(convert_codepoint_list([n]))
+        except _InvalidCodepointError:
+            return
+        except UnicodeDecodeError:
+            evaluation.message(self.get_name(), "utf8")
+            return
+
+        assert False, "can't get here"
+
+    def apply_default(self, n, evaluation):
+        "FromCharacterCode[n_]"
+        return self._decode(n, "Unicode", evaluation)
+
+    def apply(self, n, encoding, evaluation):
+        "FromCharacterCode[n_, encoding_String]"
+        return self._decode(n, encoding.get_string_value(), evaluation)
diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py
index ff34b1a38..78a5ddfd2 100644
--- a/mathics/builtin/strings.py
+++ b/mathics/builtin/strings.py
@@ -291,12 +291,6 @@ def mathics_split(patt, string, flags):
     return [string[start:stop] for start, stop in indices]
 
 
-def pack_bytes(codes):
-    return bytes(codes)
-
-def unpack_bytes(codes):
-    return [int(code) for code in codes]
-
 class SystemCharacterEncoding(Predefined):
     """
     <dl>
@@ -1350,252 +1344,6 @@ def apply_empty(self, evaluation):
         return
 
 
-class ToCharacterCode(Builtin):
-    """
-    <dl>
-    <dt>'ToCharacterCode["$string$"]'
-      <dd>converts the string to a list of character codes (Unicode
-      codepoints).
-    <dt>'ToCharacterCode[{"$string1$", "$string2$", ...}]'
-      <dd>converts a list of strings to character codes.
-    </dl>
-
-    >> ToCharacterCode["abc"]
-     = {97, 98, 99}
-    >> FromCharacterCode[%]
-     = abc
-
-    >> ToCharacterCode["\\[Alpha]\\[Beta]\\[Gamma]"]
-     = {945, 946, 947}
-
-    >> ToCharacterCode["ä", "UTF8"]
-     = {195, 164}
-
-    >> ToCharacterCode["ä", "ISO8859-1"]
-     = {228}
-
-    >> ToCharacterCode[{"ab", "c"}]
-     = {{97, 98}, {99}}
-
-    #> ToCharacterCode[{"ab"}]
-     = {{97, 98}}
-
-    #> ToCharacterCode[{{"ab"}}]
-     : String or list of strings expected at position 1 in ToCharacterCode[{{ab}}].
-     = ToCharacterCode[{{ab}}]
-
-    >> ToCharacterCode[{"ab", x}]
-     : String or list of strings expected at position 1 in ToCharacterCode[{ab, x}].
-     = ToCharacterCode[{ab, x}]
-
-    >> ListPlot[ToCharacterCode["plot this string"], Filling -> Axis]
-     = -Graphics-
-
-    #> ToCharacterCode[x]
-     : String or list of strings expected at position 1 in ToCharacterCode[x].
-     = ToCharacterCode[x]
-
-    #> ToCharacterCode[""]
-     = {}
-    """
-
-    messages = {
-        "strse": "String or list of strings expected at position `1` in `2`.",
-    }
-
-    def _encode(self, string, encoding, evaluation):
-        exp = Expression("ToCharacterCode", string)
-
-        if string.has_form("List", None):
-            string = [substring.get_string_value() for substring in string.leaves]
-            if any(substring is None for substring in string):
-                evaluation.message("ToCharacterCode", "strse", Integer1, exp)
-                return None
-        else:
-            string = string.get_string_value()
-            if string is None:
-                evaluation.message("ToCharacterCode", "strse", Integer1, exp)
-                return None
-
-        if encoding == "Unicode":
-
-            def convert(s):
-                return Expression(SymbolList, *[Integer(ord(code)) for code in s])
-
-        else:
-            py_encoding = to_python_encoding(encoding)
-            if py_encoding is None:
-                evaluation.message("General", "charcode", encoding)
-                return
-
-            def convert(s):
-                return Expression(
-                    "List", *[Integer(x) for x in unpack_bytes(s.encode(py_encoding))]
-                )
-
-        if isinstance(string, list):
-            return Expression(SymbolList, *[convert(substring) for substring in string])
-        elif isinstance(string, str):
-            return convert(string)
-
-    def apply_default(self, string, evaluation):
-        "ToCharacterCode[string_]"
-        return self._encode(string, "Unicode", evaluation)
-
-    def apply(self, string, encoding, evaluation):
-        "ToCharacterCode[string_, encoding_String]"
-        return self._encode(string, encoding.get_string_value(), evaluation)
-
-
-class _InvalidCodepointError(ValueError):
-    pass
-
-
-class FromCharacterCode(Builtin):
-    """
-    <dl>
-    <dt>'FromCharacterCode[$n$]'
-        <dd>returns the character corresponding to Unicode codepoint $n$.
-    <dt>'FromCharacterCode[{$n1$, $n2$, ...}]'
-        <dd>returns a string with characters corresponding to $n_i$.
-    <dt>'FromCharacterCode[{{$n11$, $n12$, ...}, {$n21$, $n22$, ...}, ...}]'
-        <dd>returns a list of strings.
-    </dl>
-
-    >> FromCharacterCode[100]
-     = d
-
-    >> FromCharacterCode[228, "ISO8859-1"]
-     = ä
-
-    >> FromCharacterCode[{100, 101, 102}]
-     = def
-    >> ToCharacterCode[%]
-     = {100, 101, 102}
-
-    >> FromCharacterCode[{{97, 98, 99}, {100, 101, 102}}]
-     = {abc, def}
-
-    >> ToCharacterCode["abc 123"] // FromCharacterCode
-     = abc 123
-
-    #> #1 == ToCharacterCode[FromCharacterCode[#1]] & [RandomInteger[{0, 65535}, 100]]
-     = True
-
-    #> FromCharacterCode[{}] // InputForm
-     = ""
-
-    #> FromCharacterCode[65536]
-     : A character code, which should be a non-negative integer less than 65536, is expected at position 1 in {65536}.
-     = FromCharacterCode[65536]
-    #> FromCharacterCode[-1]
-     : Non-negative machine-sized integer expected at position 1 in FromCharacterCode[-1].
-     = FromCharacterCode[-1]
-    #> FromCharacterCode[444444444444444444444444444444444444]
-     : Non-negative machine-sized integer expected at position 1 in FromCharacterCode[444444444444444444444444444444444444].
-     = FromCharacterCode[444444444444444444444444444444444444]
-
-    #> FromCharacterCode[{100, 101, -1}]
-     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, -1}.
-     = FromCharacterCode[{100, 101, -1}]
-    #> FromCharacterCode[{100, 101, 65536}]
-     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, 65536}.
-     = FromCharacterCode[{100, 101, 65536}]
-    #> FromCharacterCode[{100, 101, x}]
-     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, x}.
-     = FromCharacterCode[{100, 101, x}]
-    #> FromCharacterCode[{100, {101}}]
-     : A character code, which should be a non-negative integer less than 65536, is expected at position 2 in {100, {101}}.
-     = FromCharacterCode[{100, {101}}]
-
-    #> FromCharacterCode[{{97, 98, 99}, {100, 101, x}}]
-     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {100, 101, x}.
-     = FromCharacterCode[{{97, 98, 99}, {100, 101, x}}]
-    #> FromCharacterCode[{{97, 98, x}, {100, 101, x}}]
-     : A character code, which should be a non-negative integer less than 65536, is expected at position 3 in {97, 98, x}.
-     = FromCharacterCode[{{97, 98, x}, {100, 101, x}}]
-    """
-
-    messages = {
-        "notunicode": (
-            "A character code, which should be a non-negative integer less "
-            "than 65536, is expected at position `2` in `1`."
-        ),
-        "intnm": (
-            "Non-negative machine-sized integer expected at " "position `2` in `1`."
-        ),
-        "utf8": "The given codes could not be decoded as utf-8.",
-    }
-
-    def _decode(self, n, encoding, evaluation):
-        exp = Expression("FromCharacterCode", n)
-
-        py_encoding = to_python_encoding(encoding)
-        if py_encoding is None:
-            evaluation.message("General", "charcode", encoding)
-            return
-
-        def convert_codepoint_list(l):
-            if encoding == "Unicode":
-                s = ""
-                for i, ni in enumerate(l):
-                    pyni = ni.get_int_value()
-                    if not (pyni is not None and 0 <= pyni <= 0xFFFF):
-                        evaluation.message(
-                            "FromCharacterCode",
-                            "notunicode",
-                            Expression(SymbolList, *l),
-                            Integer(i + 1),
-                        )
-                        raise _InvalidCodepointError
-                    s += chr(pyni)
-                return s
-            else:
-                codes = [x.get_int_value() & 0xFF for x in l]
-                return pack_bytes(codes).decode(py_encoding)
-
-        try:
-            if n.has_form("List", None):
-                if not n.get_leaves():
-                    return String("")
-                # Mathematica accepts FromCharacterCode[{{100}, 101}],
-                # so to match this, just check the first leaf to see
-                # if we're dealing with nested lists.
-                elif n.get_leaves()[0].has_form("List", None):
-                    list_of_strings = []
-                    for leaf in n.get_leaves():
-                        if leaf.has_form("List", None):
-                            stringi = convert_codepoint_list(leaf.get_leaves())
-                        else:
-                            stringi = convert_codepoint_list([leaf])
-                        list_of_strings.append(String(stringi))
-                    return Expression(SymbolList, *list_of_strings)
-                else:
-                    return String(convert_codepoint_list(n.get_leaves()))
-            else:
-                pyn = n.get_int_value()
-                if not (isinstance(pyn, int) and pyn > 0 and pyn < sys.maxsize):
-                    return evaluation.message(
-                        "FromCharacterCode", "intnm", exp, Integer1
-                    )
-                return String(convert_codepoint_list([n]))
-        except _InvalidCodepointError:
-            return
-        except UnicodeDecodeError:
-            evaluation.message(self.get_name(), "utf8")
-            return
-
-        assert False, "can't get here"
-
-    def apply_default(self, n, evaluation):
-        "FromCharacterCode[n_]"
-        return self._decode(n, "Unicode", evaluation)
-
-    def apply(self, n, encoding, evaluation):
-        "FromCharacterCode[n_, encoding_String]"
-        return self._decode(n, encoding.get_string_value(), evaluation)
-
-
 class StringQ(Test):
     """
     <dl>
diff --git a/setup.py b/setup.py
index 6c478c359..3e2a18923 100644
--- a/setup.py
+++ b/setup.py
@@ -141,6 +141,7 @@ def subdirs(root, file="*.*", depth=10):
         "mathics.builtin.pymimesniffer",
         "mathics.builtin.pympler",
         "mathics.builtin.specialfns",
+        "mathics.builtin.string",
         "mathics.doc",
         "mathics.format",
     ],

From d96a56fdac26f8d4a9890d6b2e1b4c7e67f9228a Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 29 Jun 2021 21:43:44 -0400
Subject: [PATCH 3/7] Revise strings; add regexp

---
 mathics/builtin/string/operations.py | 288 ++++++++++++-
 mathics/builtin/string/patterns.py   | 280 ++++++++++++-
 mathics/builtin/string/regexp.py     |  32 ++
 mathics/builtin/strings.py           | 582 ---------------------------
 4 files changed, 595 insertions(+), 587 deletions(-)
 create mode 100644 mathics/builtin/string/regexp.py

diff --git a/mathics/builtin/string/operations.py b/mathics/builtin/string/operations.py
index 9a68b2bd9..b07db6247 100644
--- a/mathics/builtin/string/operations.py
+++ b/mathics/builtin/string/operations.py
@@ -17,14 +17,15 @@
 )
 from mathics.core.expression import (
     Expression,
+    Integer,
+    Integer1,
+    String,
     Symbol,
     SymbolFalse,
-    SymbolTrue,
     SymbolList,
-    String,
-    Integer,
-    Integer1,
+    SymbolTrue,
     from_python,
+    string_list,
 )
 from mathics.builtin.lists import python_seq, convert_seq
 from mathics.builtin.strings import (
@@ -673,6 +674,239 @@ def apply(self, string, evaluation):
         return String(string.get_string_value()[::-1])
 
 
+class StringRiffle(Builtin):
+    """
+    <dl>
+    <dt>'StringRiffle[{s1, s2, s3, ...}]'
+      <dd>returns a new string by concatenating all the $si$, with spaces inserted between them.
+    <dt>'StringRiffle[list, sep]'
+      <dd>inserts the separator $sep$ between all elements in $list$.
+    <dt>'StringRiffle[list, {"left", "sep", "right"}]'
+      <dd>use $left$ and $right$ as delimiters after concatenation.
+
+    ## These 2 forms are not currently implemented
+    ## <dt>'StringRiffle[{{s11, s12, ...}, {s21, s22, ...}, ...}]'
+    ##   <dd>returns a new string by concatenating the $sij$, and inserting spaces at the lowest level and newlines at the higher level.
+    ## <dt>'StringRiffle[list, sep1, sep2, ...]'
+    ##   <dd>inserts separator $sepi$ between elements of list at level i.
+    </dl>
+
+    >> StringRiffle[{"a", "b", "c", "d", "e"}]
+     = a b c d e
+
+    #> StringRiffle[{a, b, c, "d", e, "f"}]
+     = a b c d e f
+
+    ## 1st is not a list
+    #> StringRiffle["abcdef"]
+     : List expected at position 1 in StringRiffle[abcdef].
+     : StringRiffle called with 1 argument; 2 or more arguments are expected.
+     = StringRiffle[abcdef]
+
+    #> StringRiffle[{"", "", ""}] // FullForm
+     = "  "
+
+    ## This form is not supported
+    #> StringRiffle[{{"a", "b"}, {"c", "d"}}]
+     : Sublist form in position 1 is is not implemented yet.
+     = StringRiffle[{{a, b}, {c, d}}]
+
+    >> StringRiffle[{"a", "b", "c", "d", "e"}, ", "]
+     = a, b, c, d, e
+
+    #> StringRiffle[{"a", "b", "c", "d", "e"}, sep]
+     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, sep].
+     = StringRiffle[{a, b, c, d, e}, sep]
+
+    >> StringRiffle[{"a", "b", "c", "d", "e"}, {"(", " ", ")"}]
+     = (a b c d e)
+
+    #> StringRiffle[{"a", "b", "c", "d", "e"}, {" ", ")"}]
+     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, { , )}].
+     = StringRiffle[{a, b, c, d, e}, { , )}]
+    #> StringRiffle[{"a", "b", "c", "d", "e"}, {left, " ", "."}]
+     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, {left,  , .}].
+     = StringRiffle[{a, b, c, d, e}, {left,  , .}]
+
+    ## This form is not supported
+    #> StringRiffle[{"a", "b", "c"}, "+", "-"]
+    ## Mathematica result: a+b+c, but we are not support multiple separators
+     :  Multiple separators form is not implemented yet.
+     = StringRiffle[{a, b, c}, +, -]
+    """
+
+    attributes = ("ReadProtected",)
+
+    messages = {
+        "list": "List expected at position `1` in `2`.",
+        "argmu": "StringRiffle called with 1 argument; 2 or more arguments are expected.",
+        "argm": "StringRiffle called with 0 arguments; 2 or more arguments are expected.",
+        "string": "String expected at position `1` in `2`.",
+        "sublist": "Sublist form in position 1 is is not implemented yet.",
+        "mulsep": "Multiple separators form is not implemented yet.",
+    }
+
+    def apply(self, liststr, seps, evaluation):
+        "StringRiffle[liststr_, seps___]"
+        separators = seps.get_sequence()
+        exp = (
+            Expression("StringRiffle", liststr, seps)
+            if separators
+            else Expression("StringRiffle", liststr)
+        )
+
+        # Validate separators
+        if len(separators) > 1:
+            return evaluation.message("StringRiffle", "mulsep")
+        elif len(separators) == 1:
+            if separators[0].has_form("List", None):
+                if len(separators[0].leaves) != 3 or any(
+                    not isinstance(s, String) for s in separators[0].leaves
+                ):
+                    return evaluation.message("StringRiffle", "string", Integer(2), exp)
+            elif not isinstance(separators[0], String):
+                return evaluation.message("StringRiffle", "string", Integer(2), exp)
+
+        # Validate list of string
+        if not liststr.has_form("List", None):
+            evaluation.message("StringRiffle", "list", Integer1, exp)
+            return evaluation.message("StringRiffle", "argmu", exp)
+        elif any(leaf.has_form("List", None) for leaf in liststr.leaves):
+            return evaluation.message("StringRiffle", "sublist")
+
+        # Determine the separation token
+        left, right = "", ""
+        if len(separators) == 0:
+            sep = " "
+        else:
+            if separators[0].has_form("List", None):
+                left = separators[0].leaves[0].value
+                sep = separators[0].leaves[1].value
+                right = separators[0].leaves[2].value
+            else:
+                sep = separators[0].get_string_value()
+
+        # Getting all together
+        result = left
+        for i in range(len(liststr.leaves)):
+            text = (
+                liststr.leaves[i]
+                .format(evaluation, "System`OutputForm")
+                .boxes_to_text(evaluation=evaluation)
+            )
+            if i == len(liststr.leaves) - 1:
+                result += text + right
+            else:
+                result += text + sep
+
+        return String(result)
+
+
+class StringSplit(Builtin):
+    """
+    <dl>
+    <dt>'StringSplit["$s$"]'
+        <dd>splits the string $s$ at whitespace, discarding the
+        whitespace and returning a list of strings.
+    <dt>'StringSplit["$s$", "$d$"]'
+        <dd>splits $s$ at the delimiter $d$.
+    <dt>'StringSplit[$s$, {"$d1$", "$d2$", ...}]'
+        <dd>splits $s$ using multiple delimiters.
+    <dt>'StringSplit[{$s_1$, $s_2, ...}, {"$d1$", "$d2$", ...}]'
+        <dd>returns a list with the result of applying the function to
+            each element.
+    </dl>
+
+    >> StringSplit["abc,123", ","]
+     = {abc, 123}
+
+    >> StringSplit["abc 123"]
+     = {abc, 123}
+
+    #> StringSplit["  abc    123  "]
+     = {abc, 123}
+
+    >> StringSplit["abc,123.456", {",", "."}]
+     = {abc, 123, 456}
+
+    >> StringSplit["a  b    c", RegularExpression[" +"]]
+     = {a, b, c}
+
+    >> StringSplit[{"a  b", "c  d"}, RegularExpression[" +"]]
+     = {{a, b}, {c, d}}
+
+    #> StringSplit["x", "x"]
+     = {}
+
+    #> StringSplit[x]
+     : String or list of strings expected at position 1 in StringSplit[x].
+     = StringSplit[x, Whitespace]
+
+    #> StringSplit["x", x]
+     : Element x is not a valid string or pattern element in x.
+     = StringSplit[x, x]
+
+    #> StringSplit["12312123", "12"..]
+     = {3, 3}
+
+    #> StringSplit["abaBa", "b"]
+     = {a, aBa}
+    #> StringSplit["abaBa", "b", IgnoreCase -> True]
+     = {a, a, a}
+    """
+
+    rules = {
+        "StringSplit[s_]": "StringSplit[s, Whitespace]",
+    }
+
+    options = {
+        "IgnoreCase": "False",
+        "MetaCharacters": "None",
+    }
+
+    messages = {
+        "strse": "String or list of strings expected at position `1` in `2`.",
+        "pysplit": "As of Python 3.5 re.split does not handle empty pattern matches.",
+    }
+
+    def apply(self, string, patt, evaluation, options):
+        "StringSplit[string_, patt_, OptionsPattern[%(name)s]]"
+
+        if string.get_head_name() == "System`List":
+            leaves = [self.apply(s, patt, evaluation, options) for s in string._leaves]
+            return Expression(SymbolList, *leaves)
+
+        py_string = string.get_string_value()
+
+        if py_string is None:
+            return evaluation.message(
+                "StringSplit", "strse", Integer1, Expression("StringSplit", string)
+            )
+
+        if patt.has_form("List", None):
+            patts = patt.get_leaves()
+        else:
+            patts = [patt]
+        re_patts = []
+        for p in patts:
+            py_p = to_regex(p, evaluation)
+            if py_p is None:
+                return evaluation.message("StringExpression", "invld", p, patt)
+            re_patts.append(py_p)
+
+        flags = re.MULTILINE
+        if options["System`IgnoreCase"] == SymbolTrue:
+            flags = flags | re.IGNORECASE
+
+        result = [py_string]
+        for re_patt in re_patts:
+            result = [t for s in result for t in mathics_split(re_patt, s, flags=flags)]
+
+        return string_list(
+            SymbolList, [String(x) for x in result if x != ""], evaluation
+        )
+
+
 class StringTake(Builtin):
     """
     <dl>
@@ -779,3 +1013,49 @@ def apply_strings(self, strings, spec, evaluation):
                 return None
             result_list.append(result)
         return Expression("List", *result_list)
+
+class StringTrim(Builtin):
+    """
+    <dl>
+    <dt>'StringTrim[$s$]'
+        <dd>returns a version of $s$ with whitespace removed from start and end.
+    </dl>
+
+    >> StringJoin["a", StringTrim["  \\tb\\n "], "c"]
+     = abc
+
+    >> StringTrim["ababaxababyaabab", RegularExpression["(ab)+"]]
+     = axababya
+    """
+
+    def apply(self, s, evaluation):
+        "StringTrim[s_String]"
+        return String(s.get_string_value().strip(" \t\n"))
+
+    def apply_pattern(self, s, patt, expression, evaluation):
+        "StringTrim[s_String, patt_]"
+        text = s.get_string_value()
+        if not text:
+            return s
+
+        py_patt = to_regex(patt, evaluation)
+        if py_patt is None:
+            return evaluation.message("StringExpression", "invld", patt, expression)
+
+        if not py_patt.startswith(r"\A"):
+            left_patt = r"\A" + py_patt
+        else:
+            left_patt = py_patt
+
+        if not py_patt.endswith(r"\Z"):
+            right_patt = py_patt + r"\Z"
+        else:
+            right_patt = py_patt
+
+        m = re.search(left_patt, text)
+        left = m.end(0) if m else 0
+
+        m = re.search(right_patt, text)
+        right = m.start(0) if m else len(text)
+
+        return String(text[left:right])
diff --git a/mathics/builtin/string/patterns.py b/mathics/builtin/string/patterns.py
index 1b94e308c..10a1c0468 100644
--- a/mathics/builtin/string/patterns.py
+++ b/mathics/builtin/string/patterns.py
@@ -7,7 +7,11 @@
 
 from mathics.version import __version__  # noqa used in loading to check consistency.
 
-from mathics.builtin.base import Builtin
+from mathics.builtin.base import (
+    BinaryOperator,
+    Builtin
+)
+
 from mathics.core.expression import (
     Expression,
     Integer1,
@@ -17,10 +21,245 @@
 
 
 from mathics.builtin.strings import (
+    _StringFind,
     anchor_pattern,
     to_regex,
 )
 
+class DigitCharacter(Builtin):
+    """
+    <dl>
+    <dt>'DigitCharacter'
+      <dd>represents the digits 0-9.
+    </dl>
+
+    >> StringMatchQ["1", DigitCharacter]
+     = True
+    >> StringMatchQ["a", DigitCharacter]
+     = False
+    >> StringMatchQ["12", DigitCharacter]
+     = False
+
+    >> StringMatchQ["123245", DigitCharacter..]
+     = True
+
+    #> StringMatchQ["123245a6", DigitCharacter..]
+     = False
+    """
+
+
+class LetterCharacter(Builtin):
+    """
+    <dl>
+    <dt>'LetterCharacter'
+      <dd>represents letters.
+    </dl>
+
+    >> StringMatchQ[#, LetterCharacter] & /@ {"a", "1", "A", " ", "."}
+     = {True, False, True, False, False}
+
+    LetterCharacter also matches unicode characters.
+    >> StringMatchQ["\\[Lambda]", LetterCharacter]
+     = True
+    """
+
+
+class StringCases(_StringFind):
+    """
+    <dl>
+    <dt>'StringCases["$string$", $pattern$]'
+        <dd>gives all occurences of $pattern$ in $string$.
+    <dt>'StringReplace["$string$", $pattern$ -> $form$]'
+        <dd>gives all instances of $form$ that stem from occurences of $pattern$ in $string$.
+    <dt>'StringCases["$string$", {$pattern1$, $pattern2$, ...}]'
+        <dd>gives all occurences of $pattern1$, $pattern2$, ....
+    <dt>'StringReplace["$string$", $pattern$, $n$]'
+        <dd>gives only the first $n$ occurences.
+    <dt>'StringReplace[{"$string1$", "$string2$", ...}, $pattern$]'
+        <dd>gives occurences in $string1$, $string2$, ...
+    </dl>
+
+    >> StringCases["axbaxxb", "a" ~~ x_ ~~ "b"]
+     = {axb}
+
+    >> StringCases["axbaxxb", "a" ~~ x__ ~~ "b"]
+     = {axbaxxb}
+
+    >> StringCases["axbaxxb", Shortest["a" ~~ x__ ~~ "b"]]
+     = {axb, axxb}
+
+    >> StringCases["-abc- def -uvw- xyz", Shortest["-" ~~ x__ ~~ "-"] -> x]
+     = {abc, uvw}
+
+    >> StringCases["-öhi- -abc- -.-", "-" ~~ x : WordCharacter .. ~~ "-" -> x]
+     = {öhi, abc}
+
+    >> StringCases["abc-abc xyz-uvw", Shortest[x : WordCharacter .. ~~ "-" ~~ x_] -> x]
+     = {abc}
+
+    #> StringCases["abc-abc xyz-uvw", Shortest[x : WordCharacter .. ~~ "-" ~~ x : LetterCharacter] -> x]
+     : Ignored restriction given for x in x : LetterCharacter as it does not match previous occurences of x.
+     = {abc}
+
+    >> StringCases["abba", {"a" -> 10, "b" -> 20}, 2]
+     = {10, 20}
+
+    >> StringCases["a#ä_123", WordCharacter]
+     = {a, ä, 1, 2, 3}
+
+    >> StringCases["a#ä_123", LetterCharacter]
+     = {a, ä}
+    """
+
+    rules = {
+        "StringCases[rule_][string_]": "StringCases[string, rule]",
+    }
+
+    def _find(self, py_stri, py_rules, py_n, flags, evaluation):
+        def cases():
+            for match, form in _parallel_match(py_stri, py_rules, flags, py_n):
+                if form is None:
+                    yield String(match.group(0))
+                else:
+                    yield _evaluate_match(form, match, evaluation)
+
+        return Expression(SymbolList, *list(cases()))
+
+    def apply(self, string, rule, n, evaluation, options):
+        "%(name)s[string_, rule_, OptionsPattern[%(name)s], n_:System`Private`Null]"
+        # this pattern is a slight hack to get around missing Shortest/Longest.
+        return self._apply(string, rule, n, evaluation, options, True)
+
+
+class StringExpression(BinaryOperator):
+    """
+    <dl>
+    <dt>'StringExpression[s_1, s_2, ...]'
+      <dd>represents a sequence of strings and symbolic string objects $s_i$.
+    </dl>
+
+    >> "a" ~~ "b" // FullForm
+     = "ab"
+
+    #> "a" ~~ "b" ~~ "c" // FullForm
+     = "abc"
+
+    #> a ~~ b
+     = a ~~ b
+    """
+
+    operator = "~~"
+    precedence = 135
+    attributes = ("Flat", "OneIdentity", "Protected")
+
+    messages = {
+        "invld": "Element `1` is not a valid string or pattern element in `2`.",
+        "cond": "Ignored restriction given for `1` in `2` as it does not match previous occurences of `1`.",
+    }
+
+    def apply(self, args, evaluation):
+        "StringExpression[args__String]"
+        args = args.get_sequence()
+        args = [arg.get_string_value() for arg in args]
+        if None in args:
+            return
+        return String("".join(args))
+
+class StringFreeQ(Builtin):
+    """
+    <dl>
+    <dt>'StringFreeQ["$string$", $patt$]'
+        <dd>returns True if no substring in $string$ matches the string expression $patt$, and returns False otherwise.
+    <dt>'StringFreeQ[{"s1", "s2", ...}, patt]'
+        <dd>returns the list of results for each element of string list.
+    <dt>'StringFreeQ["string", {p1, p2, ...}]'
+        <dd>returns True if no substring matches any of the $pi$.
+    <dt>'StringFreeQ[patt]'
+        <dd>represents an operator form of StringFreeQ that can be applied to an expression.
+    </dl>
+
+    >> StringFreeQ["mathics", "m" ~~ __ ~~ "s"]
+     = False
+
+    >> StringFreeQ["mathics", "a" ~~ __ ~~ "m"]
+     = True
+
+    #> StringFreeQ["Hello", "o"]
+     = False
+
+    #> StringFreeQ["a"]["abcd"]
+     = False
+
+    #> StringFreeQ["Mathics", "ma", IgnoreCase -> False]
+     = True
+
+    >> StringFreeQ["Mathics", "MA" , IgnoreCase -> True]
+     = False
+
+    #> StringFreeQ["", "Empty String"]
+     = True
+
+    #> StringFreeQ["", ___]
+     = False
+
+    #> StringFreeQ["Empty Pattern", ""]
+     = False
+
+    #> StringFreeQ[notastring, "n"]
+     : String or list of strings expected at position 1 in StringFreeQ[notastring, n].
+     = StringFreeQ[notastring, n]
+
+    #> StringFreeQ["Welcome", notapattern]
+     : Element notapattern is not a valid string or pattern element in notapattern.
+     = StringFreeQ[Welcome, notapattern]
+
+    >> StringFreeQ[{"g", "a", "laxy", "universe", "sun"}, "u"]
+     = {True, True, True, False, False}
+
+    #> StringFreeQ[{}, "list of string is empty"]
+     = {}
+
+    >> StringFreeQ["e" ~~ ___ ~~ "u"] /@ {"The Sun", "Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"}
+     = {False, False, False, True, True, True, True, True, False}
+
+    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}]
+     = {True, True, False, False, True}
+
+    >> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}, IgnoreCase -> True]
+     = {True, True, False, False, False}
+
+    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {}]
+     = {True, True, True, True, True}
+
+    #> StringFreeQ[{"A", Galaxy, "Far", "Far", Away}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}]
+     : String or list of strings expected at position 1 in StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}].
+     = StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]
+
+    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {F ~~ __ ~~ "r", aw ~~ ___}]
+     : Element F ~~ __ ~~ r is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}.
+     = StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]
+    ## Mathematica can detemine correct invalid element in the pattern, it reports error:
+    ## Element F is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}.
+    """
+
+    options = {
+        "IgnoreCase": "False",
+    }
+
+    rules = {
+        "StringFreeQ[patt_][expr_]": "StringFreeQ[expr, patt]",
+    }
+
+    messages = {
+        "strse": "String or list of strings expected at position `1` in `2`.",
+    }
+
+    def apply(self, string, patt, evaluation, options):
+        "StringFreeQ[string_, patt_, OptionsPattern[%(name)s]]"
+        return _pattern_search(
+            self.__class__.__name__, string, patt, evaluation, options, False
+        )
+
 class StringMatchQ(Builtin):
     r"""
     >> StringMatchQ["abc", "abc"]
@@ -117,3 +356,42 @@ def apply(self, string, patt, evaluation, options):
             return SymbolFalse
         else:
             return SymbolTrue
+
+
+class WhitespaceCharacter(Builtin):
+    r"""
+    <dl>
+    <dt>'WhitespaceCharacter'
+      <dd>represents a single whitespace character.
+    </dl>
+
+    >> StringMatchQ["\n", WhitespaceCharacter]
+     = True
+
+    >> StringSplit["a\nb\r\nc\rd", WhitespaceCharacter]
+     = {a, b, c, d}
+
+    For sequences of whitespace characters use 'Whitespace':
+    >> StringMatchQ[" \n", WhitespaceCharacter]
+     = False
+    >> StringMatchQ[" \n", Whitespace]
+     = True
+    """
+
+
+class WordCharacter(Builtin):
+    r"""
+    <dl>
+    <dt>'WordCharacter'
+      <dd>represents a single letter or digit character.
+    </dl>
+
+    >> StringMatchQ[#, WordCharacter] &/@ {"1", "a", "A", ",", " "}
+     = {True, True, True, False, False}
+
+    Test whether a string is alphanumeric:
+    >> StringMatchQ["abc123DEF", WordCharacter..]
+     = True
+    >> StringMatchQ["$b;123", WordCharacter..]
+     = False
+    """
diff --git a/mathics/builtin/string/regexp.py b/mathics/builtin/string/regexp.py
new file mode 100644
index 000000000..dfb3d8e0d
--- /dev/null
+++ b/mathics/builtin/string/regexp.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+"""
+Regular Expressions
+"""
+
+from mathics.version import __version__  # noqa used in loading to check consistency.
+
+from mathics.builtin.base import Builtin
+
+# builtin.strings.to_regex seems to have the implementation.
+class RegularExpression(Builtin):
+    r"""
+    <dl>
+    <dt>'RegularExpression["regex"]'
+      <dd>represents the regex specified by the string $"regex"$.
+    </dl>
+
+    >> StringSplit["1.23, 4.56  7.89", RegularExpression["(\\s|,)+"]]
+     = {1.23, 4.56, 7.89}
+
+    #> RegularExpression["[abc]"]
+     = RegularExpression[[abc]]
+
+    ## Mathematica doesn't seem to verify the correctness of regex
+    #> StringSplit["ab23c", RegularExpression["[0-9]++"]]
+     : Element RegularExpression[[0-9]++] is not a valid string or pattern element in RegularExpression[[0-9]++].
+     = StringSplit[ab23c, RegularExpression[[0-9]++]]
+
+    #> StringSplit["ab23c", RegularExpression[2]]
+     : Element RegularExpression[2] is not a valid string or pattern element in RegularExpression[2].
+     = StringSplit[ab23c, RegularExpression[2]]
+    """
diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py
index 78a5ddfd2..227c6c963 100644
--- a/mathics/builtin/strings.py
+++ b/mathics/builtin/strings.py
@@ -5,7 +5,6 @@
 
 import io
 import re
-import sys
 import unicodedata
 from binascii import hexlify, unhexlify
 from heapq import heappush, heappop
@@ -14,7 +13,6 @@
 from mathics.version import __version__  # noqa used in loading to check consistency.
 
 from mathics.builtin.base import (
-    BinaryOperator,
     Builtin,
     Test,
     Predefined,
@@ -31,7 +29,6 @@
     Integer,
     Integer0,
     Integer1,
-    string_list,
 )
 from mathics.core.parser import MathicsFileLineFeeder, parse
 from mathics.settings import SYSTEM_CHARACTER_ENCODING
@@ -381,65 +378,6 @@ class CharacterEncodings(Predefined):
     }
 
 
-class StringExpression(BinaryOperator):
-    """
-    <dl>
-    <dt>'StringExpression[s_1, s_2, ...]'
-      <dd>represents a sequence of strings and symbolic string objects $s_i$.
-    </dl>
-
-    >> "a" ~~ "b" // FullForm
-     = "ab"
-
-    #> "a" ~~ "b" ~~ "c" // FullForm
-     = "abc"
-
-    #> a ~~ b
-     = a ~~ b
-    """
-
-    operator = "~~"
-    precedence = 135
-    attributes = ("Flat", "OneIdentity", "Protected")
-
-    messages = {
-        "invld": "Element `1` is not a valid string or pattern element in `2`.",
-        "cond": "Ignored restriction given for `1` in `2` as it does not match previous occurences of `1`.",
-    }
-
-    def apply(self, args, evaluation):
-        "StringExpression[args__String]"
-        args = args.get_sequence()
-        args = [arg.get_string_value() for arg in args]
-        if None in args:
-            return
-        return String("".join(args))
-
-
-class RegularExpression(Builtin):
-    r"""
-    <dl>
-    <dt>'RegularExpression["regex"]'
-      <dd>represents the regex specified by the string $"regex"$.
-    </dl>
-
-    >> StringSplit["1.23, 4.56  7.89", RegularExpression["(\\s|,)+"]]
-     = {1.23, 4.56, 7.89}
-
-    #> RegularExpression["[abc]"]
-     = RegularExpression[[abc]]
-
-    ## Mathematica doesn't seem to verify the correctness of regex
-    #> StringSplit["ab23c", RegularExpression["[0-9]++"]]
-     : Element RegularExpression[[0-9]++] is not a valid string or pattern element in RegularExpression[[0-9]++].
-     = StringSplit[ab23c, RegularExpression[[0-9]++]]
-
-    #> StringSplit["ab23c", RegularExpression[2]]
-     : Element RegularExpression[2] is not a valid string or pattern element in RegularExpression[2].
-     = StringSplit[ab23c, RegularExpression[2]]
-    """
-
-
 class NumberString(Builtin):
     """
     <dl>
@@ -458,28 +396,6 @@ class NumberString(Builtin):
     """
 
 
-class DigitCharacter(Builtin):
-    """
-    <dl>
-    <dt>'DigitCharacter'
-      <dd>represents the digits 0-9.
-    </dl>
-
-    >> StringMatchQ["1", DigitCharacter]
-     = True
-    >> StringMatchQ["a", DigitCharacter]
-     = False
-    >> StringMatchQ["12", DigitCharacter]
-     = False
-
-    >> StringMatchQ["123245", DigitCharacter..]
-     = True
-
-    #> StringMatchQ["123245a6", DigitCharacter..]
-     = False
-    """
-
-
 class Whitespace(Builtin):
     r"""
     <dl>
@@ -498,45 +414,6 @@ class Whitespace(Builtin):
     """
 
 
-class WhitespaceCharacter(Builtin):
-    r"""
-    <dl>
-    <dt>'WhitespaceCharacter'
-      <dd>represents a single whitespace character.
-    </dl>
-
-    >> StringMatchQ["\n", WhitespaceCharacter]
-     = True
-
-    >> StringSplit["a\nb\r\nc\rd", WhitespaceCharacter]
-     = {a, b, c, d}
-
-    For sequences of whitespace characters use 'Whitespace':
-    >> StringMatchQ[" \n", WhitespaceCharacter]
-     = False
-    >> StringMatchQ[" \n", Whitespace]
-     = True
-    """
-
-
-class WordCharacter(Builtin):
-    r"""
-    <dl>
-    <dt>'WordCharacter'
-      <dd>represents a single letter or digit character.
-    </dl>
-
-    >> StringMatchQ[#, WordCharacter] &/@ {"1", "a", "A", ",", " "}
-     = {True, True, True, False, False}
-
-    Test whether a string is alphanumeric:
-    >> StringMatchQ["abc123DEF", WordCharacter..]
-     = True
-    >> StringMatchQ["$b;123", WordCharacter..]
-     = False
-    """
-
-
 class StartOfString(Builtin):
     r"""
     <dl>
@@ -623,22 +500,6 @@ class WordBoundary(Builtin):
     """
 
 
-class LetterCharacter(Builtin):
-    """
-    <dl>
-    <dt>'LetterCharacter'
-      <dd>represents letters.
-    </dl>
-
-    >> StringMatchQ[#, LetterCharacter] & /@ {"a", "1", "A", " ", "."}
-     = {True, False, True, False, False}
-
-    LetterCharacter also matches unicode characters.
-    >> StringMatchQ["\\[Lambda]", LetterCharacter]
-     = True
-    """
-
-
 # FIXME: Generalize string.lower() and ord()
 def letter_number(chars: List[str], start_ord) -> List["Integer"]:
     # Note caller has verified that everything isalpha() and
@@ -810,111 +671,6 @@ class HexidecimalCharacter(Builtin):
     """
 
 
-class StringSplit(Builtin):
-    """
-    <dl>
-    <dt>'StringSplit["$s$"]'
-        <dd>splits the string $s$ at whitespace, discarding the
-        whitespace and returning a list of strings.
-    <dt>'StringSplit["$s$", "$d$"]'
-        <dd>splits $s$ at the delimiter $d$.
-    <dt>'StringSplit[$s$, {"$d1$", "$d2$", ...}]'
-        <dd>splits $s$ using multiple delimiters.
-    <dt>'StringSplit[{$s_1$, $s_2, ...}, {"$d1$", "$d2$", ...}]'
-        <dd>returns a list with the result of applying the function to
-            each element.
-    </dl>
-
-    >> StringSplit["abc,123", ","]
-     = {abc, 123}
-
-    >> StringSplit["abc 123"]
-     = {abc, 123}
-
-    #> StringSplit["  abc    123  "]
-     = {abc, 123}
-
-    >> StringSplit["abc,123.456", {",", "."}]
-     = {abc, 123, 456}
-
-    >> StringSplit["a  b    c", RegularExpression[" +"]]
-     = {a, b, c}
-
-    >> StringSplit[{"a  b", "c  d"}, RegularExpression[" +"]]
-     = {{a, b}, {c, d}}
-
-    #> StringSplit["x", "x"]
-     = {}
-
-    #> StringSplit[x]
-     : String or list of strings expected at position 1 in StringSplit[x].
-     = StringSplit[x, Whitespace]
-
-    #> StringSplit["x", x]
-     : Element x is not a valid string or pattern element in x.
-     = StringSplit[x, x]
-
-    #> StringSplit["12312123", "12"..]
-     = {3, 3}
-
-    #> StringSplit["abaBa", "b"]
-     = {a, aBa}
-    #> StringSplit["abaBa", "b", IgnoreCase -> True]
-     = {a, a, a}
-    """
-
-    rules = {
-        "StringSplit[s_]": "StringSplit[s, Whitespace]",
-    }
-
-    options = {
-        "IgnoreCase": "False",
-        "MetaCharacters": "None",
-    }
-
-    messages = {
-        "strse": "String or list of strings expected at position `1` in `2`.",
-        "pysplit": "As of Python 3.5 re.split does not handle empty pattern matches.",
-    }
-
-    def apply(self, string, patt, evaluation, options):
-        "StringSplit[string_, patt_, OptionsPattern[%(name)s]]"
-
-        if string.get_head_name() == "System`List":
-            leaves = [self.apply(s, patt, evaluation, options) for s in string._leaves]
-            return Expression(SymbolList, *leaves)
-
-        py_string = string.get_string_value()
-
-        if py_string is None:
-            return evaluation.message(
-                "StringSplit", "strse", Integer1, Expression("StringSplit", string)
-            )
-
-        if patt.has_form("List", None):
-            patts = patt.get_leaves()
-        else:
-            patts = [patt]
-        re_patts = []
-        for p in patts:
-            py_p = to_regex(p, evaluation)
-            if py_p is None:
-                return evaluation.message("StringExpression", "invld", p, patt)
-            re_patts.append(py_p)
-
-        flags = re.MULTILINE
-        if options["System`IgnoreCase"] == SymbolTrue:
-            flags = flags | re.IGNORECASE
-
-        result = [py_string]
-        for re_patt in re_patts:
-            result = [t for s in result for t in mathics_split(re_patt, s, flags=flags)]
-
-        return string_list(
-            SymbolList, [String(x) for x in result if x != ""], evaluation
-        )
-
-
 class _StringFind(Builtin):
     attributes = "Protected"
 
@@ -1003,73 +759,6 @@ def convert_rule(r):
             return self._find(py_strings, py_rules, py_n, flags, evaluation)
 
 
-class StringCases(_StringFind):
-    """
-    <dl>
-    <dt>'StringCases["$string$", $pattern$]'
-        <dd>gives all occurences of $pattern$ in $string$.
-    <dt>'StringReplace["$string$", $pattern$ -> $form$]'
-        <dd>gives all instances of $form$ that stem from occurences of $pattern$ in $string$.
-    <dt>'StringCases["$string$", {$pattern1$, $pattern2$, ...}]'
-        <dd>gives all occurences of $pattern1$, $pattern2$, ....
-    <dt>'StringReplace["$string$", $pattern$, $n$]'
-        <dd>gives only the first $n$ occurences.
-    <dt>'StringReplace[{"$string1$", "$string2$", ...}, $pattern$]'
-        <dd>gives occurences in $string1$, $string2$, ...
-    </dl>
-
-    >> StringCases["axbaxxb", "a" ~~ x_ ~~ "b"]
-     = {axb}
-
-    >> StringCases["axbaxxb", "a" ~~ x__ ~~ "b"]
-     = {axbaxxb}
-
-    >> StringCases["axbaxxb", Shortest["a" ~~ x__ ~~ "b"]]
-     = {axb, axxb}
-
-    >> StringCases["-abc- def -uvw- xyz", Shortest["-" ~~ x__ ~~ "-"] -> x]
-     = {abc, uvw}
-
-    >> StringCases["-öhi- -abc- -.-", "-" ~~ x : WordCharacter .. ~~ "-" -> x]
-     = {öhi, abc}
-
-    >> StringCases["abc-abc xyz-uvw", Shortest[x : WordCharacter .. ~~ "-" ~~ x_] -> x]
-     = {abc}
-
-    #> StringCases["abc-abc xyz-uvw", Shortest[x : WordCharacter .. ~~ "-" ~~ x : LetterCharacter] -> x]
-     : Ignored restriction given for x in x : LetterCharacter as it does not match previous occurences of x.
-     = {abc}
-
-    >> StringCases["abba", {"a" -> 10, "b" -> 20}, 2]
-     = {10, 20}
-
-    >> StringCases["a#ä_123", WordCharacter]
-     = {a, ä, 1, 2, 3}
-
-    >> StringCases["a#ä_123", LetterCharacter]
-     = {a, ä}
-    """
-
-    rules = {
-        "StringCases[rule_][string_]": "StringCases[string, rule]",
-    }
-
-    def _find(self, py_stri, py_rules, py_n, flags, evaluation):
-        def cases():
-            for match, form in _parallel_match(py_stri, py_rules, flags, py_n):
-                if form is None:
-                    yield String(match.group(0))
-                else:
-                    yield _evaluate_match(form, match, evaluation)
-
-        return Expression(SymbolList, *list(cases()))
-
-    def apply(self, string, rule, n, evaluation, options):
-        "%(name)s[string_, rule_, OptionsPattern[%(name)s], n_:System`Private`Null]"
-        # this pattern is a slight hack to get around missing Shortest/Longest.
-        return self._apply(string, rule, n, evaluation, options, True)
-
-
 class StringRepeat(Builtin):
     """
     <dl>
@@ -1670,53 +1359,6 @@ def apply(self, s, evaluation):
         return String(unidecode(s.get_string_value()))
 
 
-class StringTrim(Builtin):
-    """
-    <dl>
-    <dt>'StringTrim[$s$]'
-        <dd>returns a version of $s$ with whitespace removed from start and end.
-    </dl>
-
-    >> StringJoin["a", StringTrim["  \\tb\\n "], "c"]
-     = abc
-
-    >> StringTrim["ababaxababyaabab", RegularExpression["(ab)+"]]
-     = axababya
-    """
-
-    def apply(self, s, evaluation):
-        "StringTrim[s_String]"
-        return String(s.get_string_value().strip(" \t\n"))
-
-    def apply_pattern(self, s, patt, expression, evaluation):
-        "StringTrim[s_String, patt_]"
-        text = s.get_string_value()
-        if not text:
-            return s
-
-        py_patt = to_regex(patt, evaluation)
-        if py_patt is None:
-            return evaluation.message("StringExpression", "invld", patt, expression)
-
-        if not py_patt.startswith(r"\A"):
-            left_patt = r"\A" + py_patt
-        else:
-            left_patt = py_patt
-
-        if not py_patt.endswith(r"\Z"):
-            right_patt = py_patt + r"\Z"
-        else:
-            right_patt = py_patt
-
-        m = re.search(left_patt, text)
-        left = m.end(0) if m else 0
-
-        m = re.search(right_patt, text)
-        right = m.start(0) if m else len(text)
-
-        return String(text[left:right])
-
-
 def _pattern_search(name, string, patt, evaluation, options, matched):
     # Get the pattern list and check validity for each
     if patt.has_form("List", None):
@@ -1851,227 +1493,3 @@ def apply(self, string, patt, evaluation, options):
         return _pattern_search(
             self.__class__.__name__, string, patt, evaluation, options, True
         )
-
-
-class StringFreeQ(Builtin):
-    """
-    <dl>
-    <dt>'StringFreeQ["$string$", $patt$]'
-        <dd>returns True if no substring in $string$ matches the string expression $patt$, and returns False otherwise.
-    <dt>'StringFreeQ[{"s1", "s2", ...}, patt]'
-        <dd>returns the list of results for each element of string list.
-    <dt>'StringFreeQ["string", {p1, p2, ...}]'
-        <dd>returns True if no substring matches any of the $pi$.
-    <dt>'StringFreeQ[patt]'
-        <dd>represents an operator form of StringFreeQ that can be applied to an expression.
-    </dl>
-
-    >> StringFreeQ["mathics", "m" ~~ __ ~~ "s"]
-     = False
-
-    >> StringFreeQ["mathics", "a" ~~ __ ~~ "m"]
-     = True
-
-    #> StringFreeQ["Hello", "o"]
-     = False
-
-    #> StringFreeQ["a"]["abcd"]
-     = False
-
-    #> StringFreeQ["Mathics", "ma", IgnoreCase -> False]
-     = True
-
-    >> StringFreeQ["Mathics", "MA" , IgnoreCase -> True]
-     = False
-
-    #> StringFreeQ["", "Empty String"]
-     = True
-
-    #> StringFreeQ["", ___]
-     = False
-
-    #> StringFreeQ["Empty Pattern", ""]
-     = False
-
-    #> StringFreeQ[notastring, "n"]
-     : String or list of strings expected at position 1 in StringFreeQ[notastring, n].
-     = StringFreeQ[notastring, n]
-
-    #> StringFreeQ["Welcome", notapattern]
-     : Element notapattern is not a valid string or pattern element in notapattern.
-     = StringFreeQ[Welcome, notapattern]
-
-    >> StringFreeQ[{"g", "a", "laxy", "universe", "sun"}, "u"]
-     = {True, True, True, False, False}
-
-    #> StringFreeQ[{}, "list of string is empty"]
-     = {}
-
-    >> StringFreeQ["e" ~~ ___ ~~ "u"] /@ {"The Sun", "Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"}
-     = {False, False, False, True, True, True, True, True, False}
-
-    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}]
-     = {True, True, False, False, True}
-
-    >> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}, IgnoreCase -> True]
-     = {True, True, False, False, False}
-
-    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {}]
-     = {True, True, True, True, True}
-
-    #> StringFreeQ[{"A", Galaxy, "Far", "Far", Away}, {"F" ~~ __ ~~ "r", "aw" ~~ ___}]
-     : String or list of strings expected at position 1 in StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}].
-     = StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]
-
-    #> StringFreeQ[{"A", "Galaxy", "Far", "Far", "Away"}, {F ~~ __ ~~ "r", aw ~~ ___}]
-     : Element F ~~ __ ~~ r is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}.
-     = StringFreeQ[{A, Galaxy, Far, Far, Away}, {F ~~ __ ~~ r, aw ~~ ___}]
-    ## Mathematica can detemine correct invalid element in the pattern, it reports error:
-    ## Element F is not a valid string or pattern element in {F ~~ __ ~~ r, aw ~~ ___}.
-    """
-
-    options = {
-        "IgnoreCase": "False",
-    }
-
-    rules = {
-        "StringFreeQ[patt_][expr_]": "StringFreeQ[expr, patt]",
-    }
-
-    messages = {
-        "strse": "String or list of strings expected at position `1` in `2`.",
-    }
-
-    def apply(self, string, patt, evaluation, options):
-        "StringFreeQ[string_, patt_, OptionsPattern[%(name)s]]"
-        return _pattern_search(
-            self.__class__.__name__, string, patt, evaluation, options, False
-        )
-
-
-class StringRiffle(Builtin):
-    """
-    <dl>
-    <dt>'StringRiffle[{s1, s2, s3, ...}]'
-      <dd>returns a new string by concatenating all the $si$, with spaces inserted between them.
-    <dt>'StringRiffle[list, sep]'
-      <dd>inserts the separator $sep$ between all elements in $list$.
-    <dt>'StringRiffle[list, {"left", "sep", "right"}]'
-      <dd>use $left$ and $right$ as delimiters after concatenation.
-
-    ## These 2 forms are not currently implemented
-    ## <dt>'StringRiffle[{{s11, s12, ...}, {s21, s22, ...}, ...}]'
-    ##   <dd>returns a new string by concatenating the $sij$, and inserting spaces at the lowest level and newlines at the higher level.
-    ## <dt>'StringRiffle[list, sep1, sep2, ...]'
-    ##   <dd>inserts separator $sepi$ between elements of list at level i.
-    </dl>
-
-    >> StringRiffle[{"a", "b", "c", "d", "e"}]
-     = a b c d e
-
-    #> StringRiffle[{a, b, c, "d", e, "f"}]
-     = a b c d e f
-
-    ## 1st is not a list
-    #> StringRiffle["abcdef"]
-     : List expected at position 1 in StringRiffle[abcdef].
-     : StringRiffle called with 1 argument; 2 or more arguments are expected.
-     = StringRiffle[abcdef]
-
-    #> StringRiffle[{"", "", ""}] // FullForm
-     = "  "
-
-    ## This form is not supported
-    #> StringRiffle[{{"a", "b"}, {"c", "d"}}]
-     : Sublist form in position 1 is is not implemented yet.
-     = StringRiffle[{{a, b}, {c, d}}]
-
-    >> StringRiffle[{"a", "b", "c", "d", "e"}, ", "]
-     = a, b, c, d, e
-
-    #> StringRiffle[{"a", "b", "c", "d", "e"}, sep]
-     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, sep].
-     = StringRiffle[{a, b, c, d, e}, sep]
-
-    >> StringRiffle[{"a", "b", "c", "d", "e"}, {"(", " ", ")"}]
-     = (a b c d e)
-
-    #> StringRiffle[{"a", "b", "c", "d", "e"}, {" ", ")"}]
-     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, { , )}].
-     = StringRiffle[{a, b, c, d, e}, { , )}]
-    #> StringRiffle[{"a", "b", "c", "d", "e"}, {left, " ", "."}]
-     : String expected at position 2 in StringRiffle[{a, b, c, d, e}, {left,  , .}].
-     = StringRiffle[{a, b, c, d, e}, {left,  , .}]
-
-    ## This form is not supported
-    #> StringRiffle[{"a", "b", "c"}, "+", "-"]
-    ## Mathematica result: a+b+c, but we are not support multiple separators
-     :  Multiple separators form is not implemented yet.
-     = StringRiffle[{a, b, c}, +, -]
-    """
-
-    attributes = ("ReadProtected",)
-
-    messages = {
-        "list": "List expected at position `1` in `2`.",
-        "argmu": "StringRiffle called with 1 argument; 2 or more arguments are expected.",
-        "argm": "StringRiffle called with 0 arguments; 2 or more arguments are expected.",
-        "string": "String expected at position `1` in `2`.",
-        "sublist": "Sublist form in position 1 is is not implemented yet.",
-        "mulsep": "Multiple separators form is not implemented yet.",
-    }
-
-    def apply(self, liststr, seps, evaluation):
-        "StringRiffle[liststr_, seps___]"
-        separators = seps.get_sequence()
-        exp = (
-            Expression("StringRiffle", liststr, seps)
-            if separators
-            else Expression("StringRiffle", liststr)
-        )
-
-        # Validate separators
-        if len(separators) > 1:
-            return evaluation.message("StringRiffle", "mulsep")
-        elif len(separators) == 1:
-            if separators[0].has_form("List", None):
-                if len(separators[0].leaves) != 3 or any(
-                    not isinstance(s, String) for s in separators[0].leaves
-                ):
-                    return evaluation.message("StringRiffle", "string", Integer(2), exp)
-            elif not isinstance(separators[0], String):
-                return evaluation.message("StringRiffle", "string", Integer(2), exp)
-
-        # Validate list of string
-        if not liststr.has_form("List", None):
-            evaluation.message("StringRiffle", "list", Integer1, exp)
-            return evaluation.message("StringRiffle", "argmu", exp)
-        elif any(leaf.has_form("List", None) for leaf in liststr.leaves):
-            return evaluation.message("StringRiffle", "sublist")
-
-        # Determine the separation token
-        left, right = "", ""
-        if len(separators) == 0:
-            sep = " "
-        else:
-            if separators[0].has_form("List", None):
-                left = separators[0].leaves[0].value
-                sep = separators[0].leaves[1].value
-                right = separators[0].leaves[2].value
-            else:
-                sep = separators[0].get_string_value()
-
-        # Getting all together
-        result = left
-        for i in range(len(liststr.leaves)):
-            text = (
-                liststr.leaves[i]
-                .format(evaluation, "System`OutputForm")
-                .boxes_to_text(evaluation=evaluation)
-            )
-            if i == len(liststr.leaves) - 1:
-                result += text + right
-            else:
-                result += text + sep
-
-        return String(result)

From 3b76785e96217d0865d1db6d743a720931e27279 Mon Sep 17 00:00:00 2001
From: autoblack <rocky@users.noreply.github.com>
Date: Wed, 30 Jun 2021 01:45:08 +0000
Subject: [PATCH 4/7] fixup: Format Python code with Black

---
 mathics/builtin/__init__.py          | 10 +++++++++-
 mathics/builtin/string/charcodes.py  |  8 ++++----
 mathics/builtin/string/operations.py |  1 +
 mathics/builtin/string/patterns.py   | 10 +++++-----
 4 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/mathics/builtin/__init__.py b/mathics/builtin/__init__.py
index dd271b8f5..bc1db2e7f 100755
--- a/mathics/builtin/__init__.py
+++ b/mathics/builtin/__init__.py
@@ -153,7 +153,15 @@ def is_builtin(var):
     [] if ENABLE_FILES_MODULE else ["files_io.files", "files_io.importexport"]
 )
 
-for subdir in ("colors", "drawing", "files_io", "numbers", "specialfns", "string", "fileformats"):
+for subdir in (
+    "colors",
+    "drawing",
+    "files_io",
+    "numbers",
+    "specialfns",
+    "string",
+    "fileformats",
+):
     import_name = f"{__name__}.{subdir}"
 
     if import_name in disable_file_module_names:
diff --git a/mathics/builtin/string/charcodes.py b/mathics/builtin/string/charcodes.py
index 6010fa831..be6d1ec49 100644
--- a/mathics/builtin/string/charcodes.py
+++ b/mathics/builtin/string/charcodes.py
@@ -15,17 +15,17 @@
     SymbolList,
 )
 
-from mathics.builtin.strings import (
-    _encodings,
-    to_python_encoding
-    )
+from mathics.builtin.strings import _encodings, to_python_encoding
+
 
 def pack_bytes(codes):
     return bytes(codes)
 
+
 def unpack_bytes(codes):
     return [int(code) for code in codes]
 
+
 class ToCharacterCode(Builtin):
     u"""
     <dl>
diff --git a/mathics/builtin/string/operations.py b/mathics/builtin/string/operations.py
index b07db6247..e67499c80 100644
--- a/mathics/builtin/string/operations.py
+++ b/mathics/builtin/string/operations.py
@@ -1014,6 +1014,7 @@ def apply_strings(self, strings, spec, evaluation):
             result_list.append(result)
         return Expression("List", *result_list)
 
+
 class StringTrim(Builtin):
     """
     <dl>
diff --git a/mathics/builtin/string/patterns.py b/mathics/builtin/string/patterns.py
index 10a1c0468..7a96342e8 100644
--- a/mathics/builtin/string/patterns.py
+++ b/mathics/builtin/string/patterns.py
@@ -7,17 +7,14 @@
 
 from mathics.version import __version__  # noqa used in loading to check consistency.
 
-from mathics.builtin.base import (
-    BinaryOperator,
-    Builtin
-)
+from mathics.builtin.base import BinaryOperator, Builtin
 
 from mathics.core.expression import (
     Expression,
     Integer1,
     SymbolFalse,
     SymbolTrue,
-    )
+)
 
 
 from mathics.builtin.strings import (
@@ -26,6 +23,7 @@
     to_regex,
 )
 
+
 class DigitCharacter(Builtin):
     """
     <dl>
@@ -165,6 +163,7 @@ def apply(self, args, evaluation):
             return
         return String("".join(args))
 
+
 class StringFreeQ(Builtin):
     """
     <dl>
@@ -260,6 +259,7 @@ def apply(self, string, patt, evaluation, options):
             self.__class__.__name__, string, patt, evaluation, options, False
         )
 
+
 class StringMatchQ(Builtin):
     r"""
     >> StringMatchQ["abc", "abc"]

From 2cc98251481f3f194934e88dd810e33640b7989f Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 29 Jun 2021 22:29:12 -0400
Subject: [PATCH 5/7] Add distance and similarity

---
 mathics/builtin/__init__.py                |  13 +-
 mathics/builtin/colors/color_directives.py |  31 ++-
 mathics/builtin/distance/stringdata.py     | 270 +++++++++++++++++++++
 mathics/builtin/strings.py                 | 257 +-------------------
 4 files changed, 301 insertions(+), 270 deletions(-)
 create mode 100644 mathics/builtin/distance/stringdata.py

diff --git a/mathics/builtin/__init__.py b/mathics/builtin/__init__.py
index dd271b8f5..6305c8fad 100755
--- a/mathics/builtin/__init__.py
+++ b/mathics/builtin/__init__.py
@@ -40,7 +40,7 @@ def add_builtins(new_builtins):
         if isinstance(builtin, SympyObject):
             mathics_to_sympy[name] = builtin
             for sympy_name in builtin.get_sympy_names():
-                ### print("XXX1", sympy_name)
+                # print("XXX1", sympy_name)
                 sympy_to_mathics[sympy_name] = builtin
         if isinstance(builtin, Operator):
             builtins_precedence[name] = builtin.precedence
@@ -153,7 +153,16 @@ def is_builtin(var):
     [] if ENABLE_FILES_MODULE else ["files_io.files", "files_io.importexport"]
 )
 
-for subdir in ("colors", "drawing", "files_io", "numbers", "specialfns", "string", "fileformats"):
+for subdir in (
+    "colors",
+    "distance",
+    "drawing",
+    "files_io",
+    "numbers",
+    "specialfns",
+    "string",
+    "fileformats",
+):
     import_name = f"{__name__}.{subdir}"
 
     if import_name in disable_file_module_names:
diff --git a/mathics/builtin/colors/color_directives.py b/mathics/builtin/colors/color_directives.py
index 5c475a312..a24a66e4a 100644
--- a/mathics/builtin/colors/color_directives.py
+++ b/mathics/builtin/colors/color_directives.py
@@ -1,5 +1,7 @@
 """
 Color Directives
+
+There are many different way to specify color; we support all of the color formats below and will convert between the different color formats.
 """
 
 from math import atan2, cos, exp, pi, radians, sin, sqrt
@@ -225,24 +227,27 @@ class CMYKColor(_Color):
 class ColorDistance(Builtin):
     """
     <dl>
-    <dt>'ColorDistance[$c1$, $c2$]'
-        <dd>returns a measure of color distance between the colors $c1$ and $c2$.
-    <dt>'ColorDistance[$list$, $c2$]'
-        <dd>returns a list of color distances between the colors in $list$ and $c2$.
+      <dt>'ColorDistance[$c1$, $c2$]'
+      <dd>returns a measure of color distance between the colors $c1$ and $c2$.
+
+      <dt>'ColorDistance[$list$, $c2$]'
+      <dd>returns a list of color distances between the colors in $list$ and $c2$.
     </dl>
 
     The option DistanceFunction specifies the method used to measure the color
     distance. Available options are:
 
-    CIE76: euclidean distance in the LABColor space
-    CIE94: euclidean distance in the LCHColor space
-    CIE2000 or CIEDE2000: CIE94 distance with corrections
-    CMC: Colour Measurement Committee metric (1984)
-    DeltaL: difference in the L component of LCHColor
-    DeltaC: difference in the C component of LCHColor
-    DeltaH: difference in the H component of LCHColor
+    <ul>
+      <li>CIE76: Euclidean distance in the LABColor space
+      <li>CIE94: Euclidean distance in the LCHColor space
+      <li>CIE2000 or CIEDE2000: CIE94 distance with corrections
+      <li>CMC: Color Measurement Committee metric (1984)
+      <li>DeltaL: difference in the L component of LCHColor
+      <li>DeltaC: difference in the C component of LCHColor
+      <li>DeltaH: difference in the H component of LCHColor
+    </ul>
 
-    It is also possible to specify a custom distance
+    It is also possible to specify a custom distance.
 
     >> ColorDistance[Magenta, Green]
      = 2.2507
@@ -374,7 +379,7 @@ def compute(a, b):
                     ),
                 )
 
-        if compute == None:
+        if compute is None:
             evaluation.message("ColorDistance", "invdist", distance_function)
             return
 
diff --git a/mathics/builtin/distance/stringdata.py b/mathics/builtin/distance/stringdata.py
new file mode 100644
index 000000000..b4ac85ee7
--- /dev/null
+++ b/mathics/builtin/distance/stringdata.py
@@ -0,0 +1,270 @@
+# -*- coding: utf-8 -*-
+"""
+String Distances and Similarity Measures
+"""
+
+import unicodedata
+
+from typing import Callable
+
+from mathics.version import __version__  # noqa used in loading to check consistency.
+
+from mathics.builtin.base import Builtin
+
+from mathics.core.expression import (
+    Expression,
+    Integer,
+    String,
+    SymbolTrue,
+)
+
+
+# Levenshtein's algorithm is defined by the following construction:
+# (adapted from https://de.wikipedia.org/wiki/Levenshtein-Distanz)
+#
+# given two strings s1, s2, we build a matrix D sized (len(s1) + 1,
+# len(s2) + 1) and fill it using the following rules:
+#
+# (1) D(0, 0) = 0
+# (2) D(i, 0) = i, 1 <= i <= len(s1)
+# (3) D(0, j) = j, 1 <= j <= len(s2)
+# (4) D(i, j) = minimum of
+#     D(i - 1, j - 1) + 0 if s1(j) = s2(j)
+#     D(i - 1, j - 1) + 1 (substitution)
+#     D(i, j - 1) + 1     (insertion)
+#     D(i - 1, j) + 1     (deletion)
+#
+# The computed distance will be in D(len(s1) + 1, len(s2) + 1).
+#
+# note: double brackets indicate 1-based indices below, e.g. s1[[1]]
+
+def _one_based(l):  # makes an enumerated generator 1-based
+    return ((i + 1, x) for i, x in l)
+
+
+def _prev_curr(l):  # yields pairs of (x[i - 1], x[i]) for i in 1, 2, ...
+    prev = None
+    for curr in l:
+        yield prev, curr
+        prev = curr
+
+
+def _levenshtein_d0(s2):  # compute D(0, ...)
+    return list(range(len(s2) + 1))  # see (1), (3)
+
+
+def _levenshtein_di(c1, s2, i, d_prev, sameQ, cost):  # compute one new row
+    # given c1 = s1[i], s2, i, d_prev = D(i - 1, ...), compute D(i, ...)
+
+    yield i  # start with D(i, 0) = i, see (2)
+    d_curr_prev_j = i  # d_curr_prev_j stores D(i, j - 1)
+
+    for j, c2 in _one_based(enumerate(s2)):  # c2 = s2[[j]]
+        cond = 0 if sameQ(c1, c2) else cost
+
+        d_curr_j = min(  # see (4)
+            d_prev[j - 1] + cond,  # D(i - 1, j - 1) + cond; substitution
+            d_curr_prev_j + 1,  # D(i, j - 1) + 1; insertion
+            d_prev[j] + 1,
+        )  # D(i - 1, j) + 1; deletion
+
+        yield d_curr_j
+        d_curr_prev_j = d_curr_j
+
+
+def _levenshtein(s1, s2, sameQ: Callable[..., bool]):
+    d_prev = _levenshtein_d0(s2)
+    for i, c1 in _one_based(enumerate(s1)):  # c1 = s1[[i]]
+        d_prev = list(_levenshtein_di(c1, s2, i, d_prev, sameQ, 1))
+    return d_prev[-1]
+
+
+def _damerau_levenshtein(s1, s2, sameQ: Callable[..., bool]):
+    # _damerau_levenshtein works like _levenshtein, except for one additional
+    # rule covering transposition:
+    #
+    # if i > 1 and j > 1 and a[i] == b[j - 1] and a[i - 1] == b[j] then
+    #     D(i, j) = minimum(D(i, j), D(i - 2, j - 2) + transposition_cost)
+
+    def row(d_prev_prev, d_prev, i, prev_c1, c1, cost):
+        # given c1 = s1[i], d_prev_prev = D(i - 2), d_prev = D(i - 1),
+        # prev_c1 = s1[[i - 1]], c1 = s1[[i]], compute D(i, ...)
+        for j, d_curr_j in enumerate(_levenshtein_di(c1, s2, i, d_prev, sameQ, cost)):
+            if i > 1 and j > 1:
+                if sameQ(c1, s2[j - 2]) and sameQ(prev_c1, s2[j - 1]):  # transposition?
+                    # i.e. if s1[[i]] = s2[[j-1]] and s1[[i-1]] = s2[[j]]
+                    d_curr_j = min(d_curr_j, d_prev_prev[j - 2] + cost)
+            yield d_curr_j
+
+    d_prev_prev = None
+    d_prev = _levenshtein_d0(s2)
+    for i, (prev_c1, c1) in _one_based(enumerate(_prev_curr(s1))):
+        d_curr = list(row(d_prev_prev, d_prev, i, prev_c1, c1, 1))
+        d_prev_prev = d_prev
+        d_prev = d_curr
+
+    return d_prev[-1]
+
+
+def _levenshtein_like_or_border_cases(s1, s2, sameQ: Callable[..., bool], compute):
+    if len(s1) == len(s2) and all(sameQ(c1, c2) for c1, c2 in zip(s1, s2)):
+        return 0
+
+    if len(s1) < len(s2):
+        s1, s2 = s2, s1
+
+    if len(s2) == 0:
+        return len(s1)
+
+    return compute(s1, s2, sameQ)
+
+
+class _StringDistance(Builtin):
+    options = {"IgnoreCase": "False"}
+
+    def apply(self, a, b, evaluation, options):
+        "%(name)s[a_, b_, OptionsPattern[%(name)s]]"
+        if isinstance(a, String) and isinstance(b, String):
+            py_a = a.get_string_value()
+            py_b = b.get_string_value()
+            if options["System`IgnoreCase"] == SymbolTrue:
+                if hasattr(str, "casefold"):
+
+                    def normalize(c):
+                        return unicodedata.normalize("NFKD", c.casefold())
+
+                    py_a = [normalize(c) for c in py_a]
+                    py_b = [normalize(c) for c in py_b]
+                else:  # python2, PyPy
+                    py_a = py_a.lower()
+                    py_b = py_b.lower()
+            return Integer(self._distance(py_a, py_b, lambda u, v: u == v))
+        elif a.get_head_name() == "System`List" and b.get_head_name() == "System`List":
+            return Integer(self._distance(a.leaves, b.leaves, lambda u, v: u.sameQ(v)))
+        else:
+            return Expression("EditDistance", a, b)
+
+
+class DamerauLevenshteinDistance(_StringDistance):
+    """
+    <dl>
+    <dt>'DamerauLevenshteinDistance[$a$, $b$]'
+        <dd>returns the Damerau-Levenshtein distance of $a$ and $b$, which is defined as the minimum number of
+        transpositions, insertions, deletions and substitutions needed to transform one into the other.
+        In contrast to EditDistance, DamerauLevenshteinDistance counts transposition of adjacent items (e.g.
+        "ab" into "ba") as one operation of change.
+    </dl>
+
+    >> DamerauLevenshteinDistance["kitten", "kitchen"]
+     = 2
+
+    >> DamerauLevenshteinDistance["abc", "ac"]
+     = 1
+
+    >> DamerauLevenshteinDistance["abc", "acb"]
+     = 1
+
+    >> DamerauLevenshteinDistance["azbc", "abxyc"]
+     = 3
+
+    The IgnoreCase option makes DamerauLevenshteinDistance ignore the case of letters:
+    >> DamerauLevenshteinDistance["time", "Thyme"]
+     = 3
+
+    >> DamerauLevenshteinDistance["time", "Thyme", IgnoreCase -> True]
+     = 2
+
+    DamerauLevenshteinDistance also works on lists:
+    >> DamerauLevenshteinDistance[{1, E, 2, Pi}, {1, E, Pi, 2}]
+     = 1
+    """
+
+    def _distance(self, s1, s2, sameQ: Callable[..., bool]):
+        return _levenshtein_like_or_border_cases(s1, s2, sameQ, _damerau_levenshtein)
+
+class EditDistance(_StringDistance):
+    """
+    <dl>
+    <dt>'EditDistance[$a$, $b$]'
+        <dd>returns the Levenshtein distance of $a$ and $b$, which is defined as the minimum number of
+        insertions, deletions and substitutions on the constituents of $a$ and $b$ needed to transform
+        one into the other.
+    </dl>
+
+    >> EditDistance["kitten", "kitchen"]
+     = 2
+
+    >> EditDistance["abc", "ac"]
+     = 1
+
+    >> EditDistance["abc", "acb"]
+     = 2
+
+    >> EditDistance["azbc", "abxyc"]
+     = 3
+
+    The IgnoreCase option makes EditDistance ignore the case of letters:
+    >> EditDistance["time", "Thyme"]
+     = 3
+
+    >> EditDistance["time", "Thyme", IgnoreCase -> True]
+     = 2
+
+    EditDistance also works on lists:
+    >> EditDistance[{1, E, 2, Pi}, {1, E, Pi, 2}]
+     = 2
+    """
+
+    def _distance(self, s1, s2, sameQ: Callable[..., bool]):
+        return _levenshtein_like_or_border_cases(s1, s2, sameQ, _levenshtein)
+
+
+class HammingDistance(Builtin):
+    """
+    <dl>
+    <dt>'HammingDistance[$u$, $v$]'
+      <dd>returns the Hamming distance between $u$ and $v$, i.e. the number of different elements.
+      $u$ and $v$ may be lists or strings.
+    </dl>
+
+    >> HammingDistance[{1, 0, 1, 0}, {1, 0, 0, 1}]
+    = 2
+
+    >> HammingDistance["time", "dime"]
+    = 1
+
+    >> HammingDistance["TIME", "dime", IgnoreCase -> True]
+    = 1
+    """
+
+    messages = {
+        "idim": "`1` and `2` must be of same length.",
+    }
+
+    options = {
+        "IgnoreCase": "False",
+    }
+
+    @staticmethod
+    def _compute(u, v, sameQ, evaluation):
+        if len(u) != len(v):
+            evaluation.message("HammingDistance", "idim", u, v)
+            return None
+        else:
+            return Integer(sum(0 if sameQ(x, y) else 1 for x, y in zip(u, v)))
+
+    def apply_list(self, u, v, evaluation):
+        "HammingDistance[u_List, v_List]"
+        return HammingDistance._compute(
+            u.leaves, v.leaves, lambda x, y: x.sameQ(y), evaluation
+        )
+
+    def apply_string(self, u, v, evaluation, options):
+        "HammingDistance[u_String, v_String, OptionsPattern[HammingDistance]]"
+        ignore_case = self.get_option(options, "IgnoreCase", evaluation)
+        py_u = u.get_string_value()
+        py_v = v.get_string_value()
+        if ignore_case and ignore_case.is_true():
+            py_u = py_u.lower()
+            py_v = py_v.lower()
+        return HammingDistance._compute(py_u, py_v, lambda x, y: x == y, evaluation)
diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py
index 227c6c963..db12c03fc 100644
--- a/mathics/builtin/strings.py
+++ b/mathics/builtin/strings.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Unsorted Strings and Characters
+Strings and Characters - Miscellaneous
 """
 
 import io
@@ -8,7 +8,7 @@
 import unicodedata
 from binascii import hexlify, unhexlify
 from heapq import heappush, heappop
-from typing import Any, Callable, List
+from typing import Any, List
 
 from mathics.version import __version__  # noqa used in loading to check consistency.
 
@@ -1052,259 +1052,6 @@ def test(self, expr):
         return isinstance(expr, String)
 
 
-class HammingDistance(Builtin):
-    """
-    <dl>
-    <dt>'HammingDistance[$u$, $v$]'
-      <dd>returns the Hamming distance between $u$ and $v$, i.e. the number of different elements.
-      $u$ and $v$ may be lists or strings.
-    </dl>
-
-    >> HammingDistance[{1, 0, 1, 0}, {1, 0, 0, 1}]
-    = 2
-
-    >> HammingDistance["time", "dime"]
-    = 1
-
-    >> HammingDistance["TIME", "dime", IgnoreCase -> True]
-    = 1
-    """
-
-    messages = {
-        "idim": "`1` and `2` must be of same length.",
-    }
-
-    options = {
-        "IgnoreCase": "False",
-    }
-
-    @staticmethod
-    def _compute(u, v, sameQ, evaluation):
-        if len(u) != len(v):
-            evaluation.message("HammingDistance", "idim", u, v)
-            return None
-        else:
-            return Integer(sum(0 if sameQ(x, y) else 1 for x, y in zip(u, v)))
-
-    def apply_list(self, u, v, evaluation):
-        "HammingDistance[u_List, v_List]"
-        return HammingDistance._compute(
-            u.leaves, v.leaves, lambda x, y: x.sameQ(y), evaluation
-        )
-
-    def apply_string(self, u, v, evaluation, options):
-        "HammingDistance[u_String, v_String, OptionsPattern[HammingDistance]]"
-        ignore_case = self.get_option(options, "IgnoreCase", evaluation)
-        py_u = u.get_string_value()
-        py_v = v.get_string_value()
-        if ignore_case and ignore_case.is_true():
-            py_u = py_u.lower()
-            py_v = py_v.lower()
-        return HammingDistance._compute(py_u, py_v, lambda x, y: x == y, evaluation)
-
-
-class _StringDistance(Builtin):
-    options = {"IgnoreCase": "False"}
-
-    def apply(self, a, b, evaluation, options):
-        "%(name)s[a_, b_, OptionsPattern[%(name)s]]"
-        if isinstance(a, String) and isinstance(b, String):
-            py_a = a.get_string_value()
-            py_b = b.get_string_value()
-            if options["System`IgnoreCase"] == SymbolTrue:
-                if hasattr(str, "casefold"):
-
-                    def normalize(c):
-                        return unicodedata.normalize("NFKD", c.casefold())
-
-                    py_a = [normalize(c) for c in py_a]
-                    py_b = [normalize(c) for c in py_b]
-                else:  # python2, PyPy
-                    py_a = py_a.lower()
-                    py_b = py_b.lower()
-            return Integer(self._distance(py_a, py_b, lambda u, v: u == v))
-        elif a.get_head_name() == "System`List" and b.get_head_name() == "System`List":
-            return Integer(self._distance(a.leaves, b.leaves, lambda u, v: u.sameQ(v)))
-        else:
-            return Expression("EditDistance", a, b)
-
-
-# Levenshtein's algorithm is defined by the following construction:
-# (adapted from https://de.wikipedia.org/wiki/Levenshtein-Distanz)
-#
-# given two strings s1, s2, we build a matrix D sized (len(s1) + 1,
-# len(s2) + 1) and fill it using the following rules:
-#
-# (1) D(0, 0) = 0
-# (2) D(i, 0) = i, 1 <= i <= len(s1)
-# (3) D(0, j) = j, 1 <= j <= len(s2)
-# (4) D(i, j) = minimum of
-#     D(i - 1, j - 1) + 0 if s1(j) = s2(j)
-#     D(i - 1, j - 1) + 1 (substitution)
-#     D(i, j - 1) + 1     (insertion)
-#     D(i - 1, j) + 1     (deletion)
-#
-# The computed distance will be in D(len(s1) + 1, len(s2) + 1).
-#
-# note: double brackets indicate 1-based indices below, e.g. s1[[1]]
-
-
-def _one_based(l):  # makes an enumerated generator 1-based
-    return ((i + 1, x) for i, x in l)
-
-
-def _prev_curr(l):  # yields pairs of (x[i - 1], x[i]) for i in 1, 2, ...
-    prev = None
-    for curr in l:
-        yield prev, curr
-        prev = curr
-
-
-def _levenshtein_d0(s2):  # compute D(0, ...)
-    return list(range(len(s2) + 1))  # see (1), (3)
-
-
-def _levenshtein_di(c1, s2, i, d_prev, sameQ, cost):  # compute one new row
-    # given c1 = s1[i], s2, i, d_prev = D(i - 1, ...), compute D(i, ...)
-
-    yield i  # start with D(i, 0) = i, see (2)
-    d_curr_prev_j = i  # d_curr_prev_j stores D(i, j - 1)
-
-    for j, c2 in _one_based(enumerate(s2)):  # c2 = s2[[j]]
-        cond = 0 if sameQ(c1, c2) else cost
-
-        d_curr_j = min(  # see (4)
-            d_prev[j - 1] + cond,  # D(i - 1, j - 1) + cond; substitution
-            d_curr_prev_j + 1,  # D(i, j - 1) + 1; insertion
-            d_prev[j] + 1,
-        )  # D(i - 1, j) + 1; deletion
-
-        yield d_curr_j
-        d_curr_prev_j = d_curr_j
-
-
-def _levenshtein(s1, s2, sameQ: Callable[..., bool]):
-    d_prev = _levenshtein_d0(s2)
-    for i, c1 in _one_based(enumerate(s1)):  # c1 = s1[[i]]
-        d_prev = list(_levenshtein_di(c1, s2, i, d_prev, sameQ, 1))
-    return d_prev[-1]
-
-
-def _damerau_levenshtein(s1, s2, sameQ: Callable[..., bool]):
-    # _damerau_levenshtein works like _levenshtein, except for one additional
-    # rule covering transposition:
-    #
-    # if i > 1 and j > 1 and a[i] == b[j - 1] and a[i - 1] == b[j] then
-    #     D(i, j) = minimum(D(i, j), D(i - 2, j - 2) + transposition_cost)
-
-    def row(d_prev_prev, d_prev, i, prev_c1, c1, cost):
-        # given c1 = s1[i], d_prev_prev = D(i - 2), d_prev = D(i - 1),
-        # prev_c1 = s1[[i - 1]], c1 = s1[[i]], compute D(i, ...)
-        for j, d_curr_j in enumerate(_levenshtein_di(c1, s2, i, d_prev, sameQ, cost)):
-            if i > 1 and j > 1:
-                if sameQ(c1, s2[j - 2]) and sameQ(prev_c1, s2[j - 1]):  # transposition?
-                    # i.e. if s1[[i]] = s2[[j-1]] and s1[[i-1]] = s2[[j]]
-                    d_curr_j = min(d_curr_j, d_prev_prev[j - 2] + cost)
-            yield d_curr_j
-
-    d_prev_prev = None
-    d_prev = _levenshtein_d0(s2)
-    for i, (prev_c1, c1) in _one_based(enumerate(_prev_curr(s1))):
-        d_curr = list(row(d_prev_prev, d_prev, i, prev_c1, c1, 1))
-        d_prev_prev = d_prev
-        d_prev = d_curr
-
-    return d_prev[-1]
-
-
-def _levenshtein_like_or_border_cases(s1, s2, sameQ: Callable[..., bool], compute):
-    if len(s1) == len(s2) and all(sameQ(c1, c2) for c1, c2 in zip(s1, s2)):
-        return 0
-
-    if len(s1) < len(s2):
-        s1, s2 = s2, s1
-
-    if len(s2) == 0:
-        return len(s1)
-
-    return compute(s1, s2, sameQ)
-
-
-class EditDistance(_StringDistance):
-    """
-    <dl>
-    <dt>'EditDistance[$a$, $b$]'
-        <dd>returns the Levenshtein distance of $a$ and $b$, which is defined as the minimum number of
-        insertions, deletions and substitutions on the constituents of $a$ and $b$ needed to transform
-        one into the other.
-    </dl>
-
-    >> EditDistance["kitten", "kitchen"]
-     = 2
-
-    >> EditDistance["abc", "ac"]
-     = 1
-
-    >> EditDistance["abc", "acb"]
-     = 2
-
-    >> EditDistance["azbc", "abxyc"]
-     = 3
-
-    The IgnoreCase option makes EditDistance ignore the case of letters:
-    >> EditDistance["time", "Thyme"]
-     = 3
-
-    >> EditDistance["time", "Thyme", IgnoreCase -> True]
-     = 2
-
-    EditDistance also works on lists:
-    >> EditDistance[{1, E, 2, Pi}, {1, E, Pi, 2}]
-     = 2
-    """
-
-    def _distance(self, s1, s2, sameQ: Callable[..., bool]):
-        return _levenshtein_like_or_border_cases(s1, s2, sameQ, _levenshtein)
-
-
-class DamerauLevenshteinDistance(_StringDistance):
-    """
-    <dl>
-    <dt>'DamerauLevenshteinDistance[$a$, $b$]'
-        <dd>returns the Damerau-Levenshtein distance of $a$ and $b$, which is defined as the minimum number of
-        transpositions, insertions, deletions and substitutions needed to transform one into the other.
-        In contrast to EditDistance, DamerauLevenshteinDistance counts transposition of adjacent items (e.g.
-        "ab" into "ba") as one operation of change.
-    </dl>
-
-    >> DamerauLevenshteinDistance["kitten", "kitchen"]
-     = 2
-
-    >> DamerauLevenshteinDistance["abc", "ac"]
-     = 1
-
-    >> DamerauLevenshteinDistance["abc", "acb"]
-     = 1
-
-    >> DamerauLevenshteinDistance["azbc", "abxyc"]
-     = 3
-
-    The IgnoreCase option makes DamerauLevenshteinDistance ignore the case of letters:
-    >> DamerauLevenshteinDistance["time", "Thyme"]
-     = 3
-
-    >> DamerauLevenshteinDistance["time", "Thyme", IgnoreCase -> True]
-     = 2
-
-    DamerauLevenshteinDistance also works on lists:
-    >> DamerauLevenshteinDistance[{1, E, 2, Pi}, {1, E, Pi, 2}]
-     = 1
-    """
-
-    def _distance(self, s1, s2, sameQ: Callable[..., bool]):
-        return _levenshtein_like_or_border_cases(s1, s2, sameQ, _damerau_levenshtein)
-
-
 class RemoveDiacritics(Builtin):
     """
     <dl>

From 58d1882bfb1eb52909c76bb98c85f01902fe145b Mon Sep 17 00:00:00 2001
From: autoblack <rocky@users.noreply.github.com>
Date: Wed, 30 Jun 2021 02:31:05 +0000
Subject: [PATCH 6/7] fixup: Format Python code with Black

---
 mathics/builtin/distance/stringdata.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mathics/builtin/distance/stringdata.py b/mathics/builtin/distance/stringdata.py
index b4ac85ee7..d65c874ef 100644
--- a/mathics/builtin/distance/stringdata.py
+++ b/mathics/builtin/distance/stringdata.py
@@ -38,6 +38,7 @@
 #
 # note: double brackets indicate 1-based indices below, e.g. s1[[1]]
 
+
 def _one_based(l):  # makes an enumerated generator 1-based
     return ((i + 1, x) for i, x in l)
 
@@ -182,6 +183,7 @@ class DamerauLevenshteinDistance(_StringDistance):
     def _distance(self, s1, s2, sameQ: Callable[..., bool]):
         return _levenshtein_like_or_border_cases(s1, s2, sameQ, _damerau_levenshtein)
 
+
 class EditDistance(_StringDistance):
     """
     <dl>

From 2965615c3ea7aba8ace471fe4cd2d0b95d6a7773 Mon Sep 17 00:00:00 2001
From: rocky <rb@dustyfeet.com>
Date: Tue, 29 Jun 2021 23:10:12 -0400
Subject: [PATCH 7/7] Fix some of the import bugs introduced

---
 mathics/builtin/colors/color_operations.py |   5 +-
 mathics/builtin/colors/named_colors.py     |   3 +-
 mathics/builtin/string/charcodes.py        |   3 +-
 mathics/builtin/string/operations.py       |   6 +-
 mathics/builtin/string/patterns.py         | 124 ++++++++++++++++++---
 mathics/builtin/strings.py                 |  86 --------------
 setup.py                                   |   1 +
 7 files changed, 119 insertions(+), 109 deletions(-)

diff --git a/mathics/builtin/colors/color_operations.py b/mathics/builtin/colors/color_operations.py
index aaf874044..94580a119 100644
--- a/mathics/builtin/colors/color_operations.py
+++ b/mathics/builtin/colors/color_operations.py
@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
-"""Color Operations"""
+"""Color Operations
+
+Functions for manipulating colors and color images.
+"""
 
 from mathics.version import __version__  # noqa used in loading to check consistency.
 
diff --git a/mathics/builtin/colors/named_colors.py b/mathics/builtin/colors/named_colors.py
index 518f93024..5d8cc3191 100644
--- a/mathics/builtin/colors/named_colors.py
+++ b/mathics/builtin/colors/named_colors.py
@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
 """Named Colors
 
-Mathics has definitions for the most common color names which can be
-used in a graphics or style specification.
+Mathics has definitions for the most common color names which can be used in a graphics or style specification.
 """
 
 from mathics.builtin.base import Builtin
diff --git a/mathics/builtin/string/charcodes.py b/mathics/builtin/string/charcodes.py
index be6d1ec49..d62150d49 100644
--- a/mathics/builtin/string/charcodes.py
+++ b/mathics/builtin/string/charcodes.py
@@ -3,6 +3,7 @@
 Character Codes
 """
 
+import sys
 from mathics.version import __version__  # noqa used in loading to check consistency.
 
 from mathics.builtin.base import Builtin
@@ -15,7 +16,7 @@
     SymbolList,
 )
 
-from mathics.builtin.strings import _encodings, to_python_encoding
+from mathics.builtin.strings import to_python_encoding
 
 
 def pack_bytes(codes):
diff --git a/mathics/builtin/string/operations.py b/mathics/builtin/string/operations.py
index e67499c80..0b9b6bb87 100644
--- a/mathics/builtin/string/operations.py
+++ b/mathics/builtin/string/operations.py
@@ -5,9 +5,6 @@
 """
 
 import re
-from sys import version_info
-from binascii import hexlify, unhexlify
-from heapq import heappush, heappop
 
 from mathics.version import __version__  # noqa used in loading to check consistency.
 
@@ -30,10 +27,9 @@
 from mathics.builtin.lists import python_seq, convert_seq
 from mathics.builtin.strings import (
     _StringFind,
-    _decode_pname,
-    _encode_pname,
     _evaluate_match,
     _parallel_match,
+    mathics_split,
     to_regex,
 )
 
diff --git a/mathics/builtin/string/patterns.py b/mathics/builtin/string/patterns.py
index 7a96342e8..5a7b811e5 100644
--- a/mathics/builtin/string/patterns.py
+++ b/mathics/builtin/string/patterns.py
@@ -12,13 +12,18 @@
 from mathics.core.expression import (
     Expression,
     Integer1,
+    String,
     SymbolFalse,
+    SymbolList,
     SymbolTrue,
 )
 
 
 from mathics.builtin.strings import (
     _StringFind,
+    _evaluate_match,
+    _pattern_search,
+    _parallel_match,
     anchor_pattern,
     to_regex,
 )
@@ -27,7 +32,7 @@
 class DigitCharacter(Builtin):
     """
     <dl>
-    <dt>'DigitCharacter'
+      <dt>'DigitCharacter'
       <dd>represents the digits 0-9.
     </dl>
 
@@ -46,10 +51,47 @@ class DigitCharacter(Builtin):
     """
 
 
+class EndOfLine(Builtin):
+    r"""
+    <dl>
+    <dt>'EndOfString'
+      <dd>represents the end of a line in a string.
+    </dl>
+
+    >> StringReplace["aba\nbba\na\nab", "a" ~~ EndOfLine -> "c"]
+     = abc
+     . bbc
+     . c
+     . ab
+
+    >> StringSplit["abc\ndef\nhij", EndOfLine]
+     = {abc,
+     . def,
+     . hij}
+    """
+
+
+class EndOfString(Builtin):
+    r"""
+    <dl>
+    <dt>'EndOfString'
+      <dd>represents the end of a string.
+    </dl>
+
+    Test whether strings end with "e":
+    >> StringMatchQ[#, __ ~~ "e" ~~ EndOfString] &/@ {"apple", "banana", "artichoke"}
+     = {True, False, True}
+
+    >> StringReplace["aab\nabb", "b" ~~ EndOfString -> "c"]
+     = aab
+     . abc
+    """
+
+
 class LetterCharacter(Builtin):
     """
     <dl>
-    <dt>'LetterCharacter'
+      <dt>'LetterCharacter'
       <dd>represents letters.
     </dl>
 
@@ -62,19 +104,60 @@ class LetterCharacter(Builtin):
     """
 
 
+class StartOfLine(Builtin):
+    r"""
+    <dl>
+    <dt>'StartOfString'
+      <dd>represents the start of a line in a string.
+    </dl>
+
+    >> StringReplace["aba\nbba\na\nab", StartOfLine ~~ "a" -> "c"]
+     = cba
+     . bba
+     . c
+     . cb
+
+    >> StringSplit["abc\ndef\nhij", StartOfLine]
+     = {abc
+     . , def
+     . , hij}
+    """
+
+
+class StartOfString(Builtin):
+    r"""
+    <dl>
+    <dt>'StartOfString'
+      <dd>represents the start of a string.
+    </dl>
+
+    Test whether strings start with "a":
+    >> StringMatchQ[#, StartOfString ~~ "a" ~~ __] &/@ {"apple", "banana", "artichoke"}
+     = {True, False, True}
+
+    >> StringReplace["aba\nabb", StartOfString ~~ "a" -> "c"]
+     = cba
+     . abb
+    """
+
+
 class StringCases(_StringFind):
     """
     <dl>
-    <dt>'StringCases["$string$", $pattern$]'
-        <dd>gives all occurences of $pattern$ in $string$.
-    <dt>'StringReplace["$string$", $pattern$ -> $form$]'
-        <dd>gives all instances of $form$ that stem from occurences of $pattern$ in $string$.
-    <dt>'StringCases["$string$", {$pattern1$, $pattern2$, ...}]'
-        <dd>gives all occurences of $pattern1$, $pattern2$, ....
-    <dt>'StringReplace["$string$", $pattern$, $n$]'
-        <dd>gives only the first $n$ occurences.
-    <dt>'StringReplace[{"$string1$", "$string2$", ...}, $pattern$]'
-        <dd>gives occurences in $string1$, $string2$, ...
+      <dt>'StringCases["$string$", $pattern$]'
+      <dd>gives all occurences of $pattern$ in $string$.
+
+      <dt>'StringReplace["$string$", $pattern$ -> $form$]'
+      <dd>gives all instances of $form$ that stem from occurences of $pattern$ in $string$.
+
+      <dt>'StringCases["$string$", {$pattern1$, $pattern2$, ...}]'
+      <dd>gives all occurences of $pattern1$, $pattern2$, ....
+
+      <dt>'StringReplace["$string$", $pattern$, $n$]'
+      <dd>gives only the first $n$ occurences.
+
+      <dt>'StringReplace[{"$string1$", "$string2$", ...}, $pattern$]'
+      <dd>gives occurences in $string1$, $string2$, ...
     </dl>
 
     >> StringCases["axbaxxb", "a" ~~ x_ ~~ "b"]
@@ -361,7 +444,7 @@ def apply(self, string, patt, evaluation, options):
 class WhitespaceCharacter(Builtin):
     r"""
     <dl>
-    <dt>'WhitespaceCharacter'
+      <dt>'WhitespaceCharacter'
       <dd>represents a single whitespace character.
     </dl>
 
@@ -379,10 +462,23 @@ class WhitespaceCharacter(Builtin):
     """
 
 
+# strings.to_regex() seems to have the implementation here.
+class WordBoundary(Builtin):
+    """
+    <dl>
+      <dt>'WordBoundary'
+      <dd>represents the boundary between words.
+    </dl>
+
+    >> StringReplace["apple banana orange artichoke", "e" ~~ WordBoundary -> "E"]
+     = applE banana orangE artichokE
+    """
+
+
 class WordCharacter(Builtin):
     r"""
     <dl>
-    <dt>'WordCharacter'
+      <dt>'WordCharacter'
       <dd>represents a single letter or digit character.
     </dl>
 
diff --git a/mathics/builtin/strings.py b/mathics/builtin/strings.py
index db12c03fc..8458477f5 100644
--- a/mathics/builtin/strings.py
+++ b/mathics/builtin/strings.py
@@ -414,92 +414,6 @@ class Whitespace(Builtin):
     """
 
 
-class StartOfString(Builtin):
-    r"""
-    <dl>
-    <dt>'StartOfString'
-      <dd>represents the start of a string.
-    </dl>
-
-    Test whether strings start with "a":
-    >> StringMatchQ[#, StartOfString ~~ "a" ~~ __] &/@ {"apple", "banana", "artichoke"}
-     = {True, False, True}
-
-    >> StringReplace["aba\nabb", StartOfString ~~ "a" -> "c"]
-     = cba
-     . abb
-    """
-
-
-class EndOfString(Builtin):
-    r"""
-    <dl>
-    <dt>'EndOfString'
-      <dd>represents the end of a string.
-    </dl>
-
-    Test whether strings end with "e":
-    >> StringMatchQ[#, __ ~~ "e" ~~ EndOfString] &/@ {"apple", "banana", "artichoke"}
-     = {True, False, True}
-
-    >> StringReplace["aab\nabb", "b" ~~ EndOfString -> "c"]
-     = aab
-     . abc
-    """
-
-
-class StartOfLine(Builtin):
-    r"""
-    <dl>
-    <dt>'StartOfString'
-      <dd>represents the start of a line in a string.
-    </dl>
-
-    >> StringReplace["aba\nbba\na\nab", StartOfLine ~~ "a" -> "c"]
-     = cba
-     . bba
-     . c
-     . cb
-
-    >> StringSplit["abc\ndef\nhij", StartOfLine]
-     = {abc
-     . , def
-     . , hij}
-    """
-
-
-class EndOfLine(Builtin):
-    r"""
-    <dl>
-    <dt>'EndOfString'
-      <dd>represents the end of a line in a string.
-    </dl>
-
-    >> StringReplace["aba\nbba\na\nab", "a" ~~ EndOfLine -> "c"]
-     = abc
-     . bbc
-     . c
-     . ab
-
-    >> StringSplit["abc\ndef\nhij", EndOfLine]
-     = {abc,
-     . def,
-     . hij}
-    """
-
-
-class WordBoundary(Builtin):
-    """
-    <dl>
-    <dt>'WordBoundary'
-      <dd>represents the boundary between words.
-    </dl>
-
-    >> StringReplace["apple banana orange artichoke", "e" ~~ WordBoundary -> "E"]
-     = applE banana orangE artichokE
-    """
-
-
 # FIXME: Generalize string.lower() and ord()
 def letter_number(chars: List[str], start_ord) -> List["Integer"]:
     # Note caller has verified that everything isalpha() and
diff --git a/setup.py b/setup.py
index 3e2a18923..5c0c80989 100644
--- a/setup.py
+++ b/setup.py
@@ -133,6 +133,7 @@ def subdirs(root, file="*.*", depth=10):
         "mathics.builtin.box",
         "mathics.builtin.colors",
         "mathics.builtin.compile",
+        "mathics.builtin.distance",
         "mathics.builtin.drawing",
         "mathics.builtin.fileformats",
         "mathics.builtin.files_io",