Copyright | (C) 2021 Koz Ross |
---|---|
License | Apache 2.0 |
Maintainer | Koz Ross <koz.ross@retro-freedom.nz> |
Stability | stable |
Portability | GHC only |
Safe Haskell | Trustworthy |
Language | Haskell2010 |
An implementation of ASCII strings.
This module is designed for qualified importing:
import qualified Text.Ascii as Ascii
See also: Wikipedia entry for ASCII
Synopsis
- data AsciiText
- empty :: AsciiText
- singleton :: AsciiChar -> AsciiText
- ascii :: QuasiQuoter
- cons :: AsciiChar -> AsciiText -> AsciiText
- snoc :: AsciiText -> AsciiChar -> AsciiText
- uncons :: AsciiText -> Maybe (AsciiChar, AsciiText)
- unsnoc :: AsciiText -> Maybe (AsciiText, AsciiChar)
- length :: AsciiText -> Int
- map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText
- intercalate :: AsciiText -> [AsciiText] -> AsciiText
- intersperse :: AsciiChar -> AsciiText -> AsciiText
- transpose :: [AsciiText] -> [AsciiText]
- reverse :: AsciiText -> AsciiText
- replace :: AsciiText -> AsciiText -> AsciiText -> AsciiText
- justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText
- justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText
- center :: Int -> AsciiChar -> AsciiText -> AsciiText
- foldl :: (a -> AsciiChar -> a) -> a -> AsciiText -> a
- foldl' :: (a -> AsciiChar -> a) -> a -> AsciiText -> a
- foldr :: (AsciiChar -> a -> a) -> a -> AsciiText -> a
- foldr' :: (AsciiChar -> a -> a) -> a -> AsciiText -> a
- concat :: [AsciiText] -> AsciiText
- concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText
- scanl :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiChar -> AsciiText -> AsciiText
- scanr :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiChar -> AsciiText -> AsciiText
- mapAccumL :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText)
- mapAccumR :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText)
- replicate :: Int -> AsciiText -> AsciiText
- unfoldr :: (a -> Maybe (AsciiChar, a)) -> a -> AsciiText
- unfoldrN :: Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a)
- take :: Int -> AsciiText -> AsciiText
- takeEnd :: Int -> AsciiText -> AsciiText
- drop :: Int -> AsciiText -> AsciiText
- dropEnd :: Int -> AsciiText -> AsciiText
- takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
- takeWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
- dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
- dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
- dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
- strip :: AsciiText -> AsciiText
- stripStart :: AsciiText -> AsciiText
- stripEnd :: AsciiText -> AsciiText
- splitAt :: Int -> AsciiText -> (AsciiText, AsciiText)
- breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
- breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
- break :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
- span :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
- group :: AsciiText -> [AsciiText]
- groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText]
- inits :: AsciiText -> [AsciiText]
- tails :: AsciiText -> [AsciiText]
- splitOn :: AsciiText -> AsciiText -> [AsciiText]
- split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText]
- chunksOf :: Int -> AsciiText -> [AsciiText]
- lines :: AsciiText -> [AsciiText]
- unlines :: Foldable f => f AsciiText -> AsciiText
- words :: AsciiText -> [AsciiText]
- unwords :: [AsciiText] -> AsciiText
- stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText
- stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText
- stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText)
- commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText)
- filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
- breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)]
- find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar
- partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
- index :: AsciiText -> Int -> Maybe AsciiChar
- findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int
- count :: AsciiText -> AsciiText -> Int
- zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)]
- zipWith :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiText -> AsciiText -> AsciiText
- fromText :: Text -> Maybe AsciiText
- fromByteString :: ByteString -> Maybe AsciiText
- toText :: AsciiText -> Text
- toByteString :: AsciiText -> ByteString
- textWise :: Prism' Text AsciiText
- byteStringWise :: Prism' ByteString AsciiText
- packedChars :: Iso' [AsciiChar] AsciiText
- chars :: IxTraversal' Int64 AsciiText AsciiChar
- packedBytes :: Prism' [Word8] AsciiText
- bytes :: IxFold Int64 AsciiText Word8
Type
A string of ASCII characters, represented as a packed byte array.
Since: 1.0.0
Instances
Creation
singleton :: AsciiChar -> AsciiText Source #
A text consisting of a single ASCII character.
>>>
singleton [char| 'w' |]
"w"
Complexity: \(\Theta(1)\)
Since: 1.0.0
ascii :: QuasiQuoter Source #
Allows constructing ASCII strings from literals, whose correctness is checked by the compiler.
Currently accepts literal syntax similar to the Haskell parser, with escape sequences preceded by '\'. In particular, this includes the double quote (see the example below).
>>>
[ascii| "\"Nyan!\", said the catboy." |]
"\"Nyan!\", said the catboy."
Since: 1.0.0
Basic interface
cons :: AsciiChar -> AsciiText -> AsciiText Source #
Adds a character to the front of a text. This requires copying, which gives its complexity.
>>>
cons [char| 'n' |] [ascii| "eko" |]
"neko"
Complexity: \(\Theta(n)\)
Since: 1.0.0
snoc :: AsciiText -> AsciiChar -> AsciiText Source #
Adds a character to the back of a text. This requires copying, which gives its complexity.
>>>
snoc [ascii| "nek" |] [char| 'o' |]
"neko"
Complexity: \(\Theta(n)\)
Since: 1.0.0
length :: AsciiText -> Int Source #
The number of characters (and, since this is ASCII, bytes) in the text.
>>>
length . singleton $ [char| 'w' |]
1>>>
length [ascii| "nyan nyan" |]
9
Complexity: \(\Theta(1)\)
Since: 1.0.0
Transformations
map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText Source #
Copy, and apply the function to each element of, the text.
>>>
map (\c -> fromMaybe c . upcase $ c) [ascii| "nyan!" |]
"NYAN!"
Complexity: \(\Theta(n)\)
Since: 1.0.0
intercalate :: AsciiText -> [AsciiText] -> AsciiText Source #
Takes a text and a list of texts, and concatenates the list after interspersing the first argument between each element of the list.
>>>
intercalate [ascii| " ~ " |] []
"">>>
intercalate [ascii| " ~ " |] [[ascii| "nyan" |]]
"nyan">>>
intercalate [ascii| " ~ " |] . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan ~ nyan ~ nyan">>>
intercalate empty . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyannyannyan"
Complexity: \(\Theta(n)\)
Since: 1.0.0
intersperse :: AsciiChar -> AsciiText -> AsciiText Source #
Takes a character, and places it between the characters of a text.
>>>
intersperse [char| '~' |] empty
"">>>
intersperse [char| '~' |] . singleton $ [char| 'w' |]
"w">>>
intersperse [char| '~' |] [ascii| "nyan" |]
"n~y~a~n"
Complexity: \(\Theta(n)\)
Since: 1.0.0
transpose :: [AsciiText] -> [AsciiText] Source #
Transpose the rows and columns of the argument. This uses
transpose
internally, and thus, isn't very efficient.
>>>
transpose []
[]>>>
transpose [[ascii| "w" |]]
["w"]>>>
transpose [[ascii| "nyan" |]]
["n","y","a","n"]>>>
transpose . Prelude.replicate 3 $ [ascii| "nyan" |]
["nnn","yyy","aaa","nnn"]>>>
transpose [[ascii| "cat" |], [ascii| "boy" |], [ascii| "nyan" |]]
["cbn","aoy","tya","n"]
Complexity: \(\Theta(n)\)
Since: 1.0.0
reverse :: AsciiText -> AsciiText Source #
Reverse the text.
>>>
reverse empty
"">>>
reverse . singleton $ [char| 'w' |]
"w">>>
reverse [ascii| "catboy goes nyan" |]
"nayn seog yobtac"
Complexity: \(\Theta(n)\)
Since: 1.0.0
:: AsciiText |
|
-> AsciiText |
|
-> AsciiText |
|
-> AsciiText |
replace needle replacement haystack
, given a needle
of length \(n\) and
a haystack of length \(h\), replaces each non-overlapping occurrence of
needle
in haystack
with replacement
. If the needle
is empty, no
replacement will be performed. Equivalent to
.intercalate
replacement .
splitOn
needle $
haystack
>>>
replace empty [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
"catboy goes nyan nyan">>>
replace [ascii| "nyan" |] [ascii| "NYAN~" |] empty
"">>>
replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
"catboy goes NYAN~ NYAN~">>>
replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "nyanyan" |]
"NYAN~yan"
On complexity
This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:
- All ASCII symbols are equally likely to occur in both the needle and the haystack; and
- The needle has length at least two; and
- Both the needle and the haystack contain at least four unique symbols.
We fall back to split
for singleton needles, and there is no work to be
done on empty needles, which means the second assumption always holds.
Worst-case behaviour becomes more likely the more your input satisfies the following conditions:
- The needle and/or haystack use few unique symbols (less than four is the worst); or
- The haystack contains many instances of the second symbol of the needle which don't lead to full matches.
The analysis below also doesn't factor in the cost of performing the replacement, as this is (among other things) proportional to the number of matches of the needle (and thus is hard to quantify).
Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.
See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.
- Description and pseudocode
- "Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.
Since: 1.0.1
Justification
justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText Source #
justifyLeft n c t
produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
consisting of a copy of t
followed by (zero or more) copies
of c
.
>>>
justifyLeft (-100) [char| '~' |] [ascii| "nyan" |]
"nyan">>>
justifyLeft 4 [char| '~' |] [ascii| "nyan" |]
"nyan">>>
justifyLeft 10 [char| '~' |] [ascii| "nyan" |]
"nyan~~~~~~"
Complexity: \(\Theta(n)\)
Since: 1.0.1
justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText Source #
justifyRight n c t
produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
consisting of (zero or more) copies of c
followed by a copy of t
.
>>>
justifyRight (-100) [char| '~' |] [ascii| "nyan" |]
"nyan">>>
justifyRight 4 [char| '~' |] [ascii| "nyan" |]
"nyan">>>
justifyRight 10 [char| '~' |] [ascii| "nyan" |]
"~~~~~~nyan"
Complexity: \(\Theta(n)\)
Since: 1.0.1
center :: Int -> AsciiChar -> AsciiText -> AsciiText Source #
center n c t
produces a result of length \({\tt k } = \max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
consisting of:
- \(\lceil \frac{{\tt k} - {\tt length} \; {\tt t}}{2} \rceil\) copies of
c
; followed by - A copy of
t
; followed by - Zero or more copies of
c
This means that the centering is 'left-biased'. This mimicks the behaviour of the function of the same name in the text package, although that function's documenation does not describe this behaviour.
>>>
center (-100) [char| '~' |] [ascii| "nyan" |]
"nyan">>>
center 4 [char| '~' |] [ascii| "nyan" |]
"nyan">>>
center 5 [char| '~' |] [ascii| "nyan" |]
"~nyan">>>
center 6 [char| '~' |] [ascii| "nyan" |]
"~nyan~"
Complexity: \(\Theta(n)\)
Since: 1.0.1
Folds
foldl :: (a -> AsciiChar -> a) -> a -> AsciiText -> a Source #
Left-associative fold of a text.
>>>
foldl (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ac)a)t)b)o)y)"
Complexity: \(\Theta(n)\)
Since: 1.0.0
foldl' :: (a -> AsciiChar -> a) -> a -> AsciiText -> a Source #
Left-associative fold of a text, strict in the accumulator.
>>>
foldl' (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ac)a)t)b)o)y)"
Complexity: \(\Theta(n)\)
Since: 1.0.0
foldr :: (AsciiChar -> a -> a) -> a -> AsciiText -> a Source #
Right-associative fold of a text.
>>>
foldr (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ay)o)b)t)a)c)"
Complexity: \(\Theta(n)\)
Since: 1.0.0
foldr' :: (AsciiChar -> a -> a) -> a -> AsciiText -> a Source #
Right-associative fold of a text, strict in the accumulator.
>>>
foldr' (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
"f(f(f(f(f(f(ay)o)b)t)a)c)"
Complexity: \(\Theta(n)\)
Since: 1.0.0
Special folds
concat :: [AsciiText] -> AsciiText Source #
Concatenate a list of texts.
>>>
concat []
"">>>
concat [[ascii| "catboy" |]]
"catboy">>>
concat . Prelude.replicate 4 $ [ascii| "nyan" |]
"nyannyannyannyan"
Complexity: \(\Theta(n)\)
Since: 1.0.0
concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText Source #
Map a text-producing function over a text, then concatenate the results.
>>>
concatMap singleton empty
"">>>
concatMap singleton [ascii| "nyan" |]
"nyan">>>
concatMap (\c -> singleton c <> singleton c) [ascii| "nekomimi" |]
"nneekkoommiimmii"
Complexity: \(\Theta(n)\)
Since: 1.0.0
Construction
Scans
Accumulating maps
Generation and unfolding
replicate :: Int -> AsciiText -> AsciiText Source #
replicate n t
consists of t
repeated \(\max \{ 0, {\tt n } \}\) times.
>>>
replicate (-100) [ascii| "nyan" |]
"">>>
replicate 0 [ascii| "nyan" |]
"">>>
replicate 3 [ascii| "nyan" |]
"nyannyannyan"
Complexity: \(\Theta(n \cdot m)\)
Since: 1.0.1
unfoldr :: (a -> Maybe (AsciiChar, a)) -> a -> AsciiText Source #
Similar to unfoldr
. The function parameter takes a seed value,
and produces either Nothing
(indicating that we're done) or Just
an
AsciiChar
and a new seed value. unfoldr
then, given a starting seed, will
repeatedly call the function parameter on successive seed values, returning
the resulting AsciiText
, based on the AsciiChar
s produced, in the same
order.
Complexity: \(\Theta(n)\)
Since: 1.0.0
unfoldrN :: Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a) Source #
Similar to unfoldr
, but also takes a maximum length parameter. The second
element of the result tuple will be Nothing
if we finished with the
function argument returning Nothing
, and Just
the final seed value if we
reached the maximum length before that happened.
Complexity: \(\Theta(n)\)
Since: 1.0.0
Substrings
Breaking strings
take :: Int -> AsciiText -> AsciiText Source #
take n t
returns the prefix of t
with length
\(\min \{ \max \{ 0, {\tt n}\}, {\tt length} \; {\tt t} \}\).
>>>
take (-100) [ascii| "catboy" |]
"">>>
take 0 [ascii| "catboy" |]
"">>>
take 4 [ascii| "catboy" |]
"catb">>>
take 1000 [ascii| "catboy" |]
"catboy"
Complexity: \(\Theta(1)\)
Since: 1.0.0
takeEnd :: Int -> AsciiText -> AsciiText Source #
takeEnd n t
returns the suffix of t
with length
\(\min \{ \max \{0, {\tt n} \}, {\tt length} \; {\tt t} \}\).
>>>
takeEnd (-100) [ascii| "catboy" |]
"">>>
takeEnd 0 [ascii| "catboy" |]
"">>>
takeEnd 4 [ascii| "catboy" |]
"tboy">>>
takeEnd 1000 [ascii| "catboy" |]
"catboy"
/Complexity:] \(\Theta(1)\)
Since: 1.0.1
drop :: Int -> AsciiText -> AsciiText Source #
drop n t
returns the suffix of t
with length
\(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\).
>>>
drop (-100) [ascii| "catboy" |]
"catboy">>>
drop 0 [ascii| "catboy" |]
"catboy">>>
drop 4 [ascii| "catboy" |]
"oy">>>
drop 1000 [ascii| "catboy" |]
""
Complexity: \(\Theta(1)\)
Since: 1.0.0
dropEnd :: Int -> AsciiText -> AsciiText Source #
dropEnd n t
returns the prefix of t
with length
\(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\).
>>>
dropEnd (-100) [ascii| "catboy" |]
"catboy">>>
dropEnd 0 [ascii| "catboy" |]
"catboy">>>
dropEnd 4 [ascii| "catboy" |]
"ca">>>
dropEnd 1000 [ascii| "catboy" |]
""
Complexity: \(\Theta(1)\)
Since: 1.0.1
takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #
takeWhile p t
returns the longest prefix of t
of characters that
satisfy p
.
>>>
takeWhile ((Just Lower ==) . caseOf) empty
"">>>
takeWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
"catboy"
Complexity: \(\Theta(n)\)
Since: 1.0.0
dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #
dropWhile p t
returns the suffix remaining after
.takeWhile
p t
>>>
dropWhile ((Just Lower ==) . caseOf) empty
"">>>
dropWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
" goes nyan"
Complexity: \(\Theta(n)\)
Since: 1.0.0
dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #
dropWhileEnd p t
returns the prefix remaining after
.
Equivalent to takeWhileEnd
p t
.reverse
. dropWhile
p . reverse
>>>
dropWhileEnd ((Just Lower ==) . caseOf) empty
"">>>
dropWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
"catboy goes "
Complexity: \(\Theta(n)\)
Since: 1.0.0
dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #
dropAround p
is equivalent to
.dropWhile
p .
dropWhileEnd
p
>>>
dropAround ((Just Lower ==) . caseOf) empty
"">>>
dropAround ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
" goes "
Complexity: \(\Theta(n)\)
Since: 1.0.1
strip :: AsciiText -> AsciiText Source #
Remove the longest prefix and suffix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:
- TAB (0x09)
- LF (0x0a)
- VT (0x0b)
- FF (0x0c)
- CR (0x0d)
- Space (0x20)
>>>
strip empty
"">>>
strip [ascii| "catboy goes nyan" |]
"catboy goes nyan">>>
strip [ascii| "\n\n \tcatboy goes nyan" |]
"catboy goes nyan">>>
strip [ascii| "catboy goes nyan \t\t\n" |]
"catboy goes nyan">>>
strip [ascii| "\n\n \tcatboy goes nyan \t\t\n" |]
"catboy goes nyan"
Complexity: \(\Theta(n)\)
Since: 1.0.1
stripStart :: AsciiText -> AsciiText Source #
Remove the longest prefix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:
- TAB (0x09)
- LF (0x0a)
- VT (0x0b)
- FF (0x0c)
- CR (0x0d)
- Space (0x20)
>>>
stripStart empty
"">>>
stripStart [ascii| "catboy goes nyan" |]
"catboy goes nyan">>>
stripStart [ascii| "\n\n \tcatboy goes nyan" |]
"catboy goes nyan">>>
stripStart [ascii| "catboy goes nyan \t\t\n" |]
"catboy goes nyan \t\t\n">>>
stripStart [ascii| "\n\n \tcatboy goes nyan \t\t\n" |]
"catboy goes nyan \t\t\n"
Complexity: \(\Theta(n)\)
Since: 1.0.1
stripEnd :: AsciiText -> AsciiText Source #
Remove the longest suffix of the input comprised entirely of whitespace characters. We define a 'whitespace character' as any of the following:
- TAB (0x09)
- LF (0x0a)
- VT (0x0b)
- FF (0x0c)
- CR (0x0d)
- Space (0x20)
>>>
stripEnd empty
"">>>
stripEnd [ascii| "catboy goes nyan" |]
"catboy goes nyan">>>
stripEnd [ascii| "\n\n \tcatboy goes nyan" |]
"\n\n \tcatboy goes nyan">>>
stripEnd [ascii| "catboy goes nyan \t\t\n" |]
"catboy goes nyan">>>
stripEnd [ascii| "\n\n \tcatboy goes nyan \t\t\n" |]
"\n\n \tcatboy goes nyan"
Complexity: \(\Theta(n)\)
Since: 1.0.1
breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText) Source #
breakOn needle haystack
, given a needle
of length \(n\) and a
haystack
of length \(h\), attempts to find the first instance of needle
in haystack
. If successful, return a tuple consisting of:
- The prefix of
haystack
before the match; and - The rest of
haystack
, starting with the match.
If the needle is empty, this returns (
. If no match can
be found, this instead returns empty
, haystack)(haystack,
.empty
)
If you need to repeatedly split on the same needle, consider breakOnAll
, as
this will be more efficient due to only having to run the matching algorithm
once.
>>>
breakOn empty [ascii| "catboy goes nyan" |]
("","catboy goes nyan")>>>
breakOn [ascii| "nyan" |] empty
("","")>>>
breakOn [ascii| "goes" |] [ascii| "catboy goes nyan" |]
("catboy ","goes nyan")>>>
breakOn [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
("nyan nyan nyan","")
On complexity
This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:
- All ASCII symbols are equally likely to occur in both the needle and the haystack; and
- The needle has length at least two; and
- Both the needle and the haystack contain at least four unique symbols.
We fall back to split
for singleton needles, and there is no work to be
done on empty needles, which means the second assumption always holds.
Worst-case behaviour becomes more likely the more your input satisfies the following conditions:
- The needle and/or haystack use few unique symbols (less than four is the worst); or
- The haystack contains many instances of the second symbol of the needle which don't lead to full matches.
Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.
See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.
- Description and pseudocode
- "Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.
Since: 1.0.1
breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText) Source #
breakOnEnd needle haystack
, given a needle
of length \(n\) and a
haystack
of length \(h\), attempts to find the last instance of needle
in
haystack
. If successful, return a tuple consisting of:
- The prefix of
haystack
up to, and including, the match; and - The rest of
haystack
.
If the needle is empty, this returns (haystack,
. If no match can
be found, this instead returns empty
)(
.empty
, haystack)
This function is similar to breakOn
. If you need to repeatedly split on the
same needle, consider breakOnAll
, as this will be more efficient due to
only having to run the matching algorithm once.
>>>
breakOnEnd empty [ascii| "catboy goes nyan" |]
("catboy goes nyan","")>>>
breakOnEnd [ascii| "nyan" |] empty
("","")>>>
breakOnEnd [ascii| "goes" |] [ascii| "catboy goes nyan" |]
("catboy goes"," nyan")>>>
breakOnEnd [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
("","nyan nyan nyan")
On complexity
This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:
- All ASCII symbols are equally likely to occur in both the needle and the haystack; and
- The needle has length at least two; and
- Both the needle and the haystack contain at least four unique symbols.
We fall back to split
for singleton needles, and there is no work to be
done on empty needles, which means the second assumption always holds.
Worst-case behaviour becomes more likely the more your input satisfies the following conditions:
- The needle and/or haystack use few unique symbols (less than four is the worst); or
- The haystack contains many instances of the second symbol of the needle which don't lead to full matches.
Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.
See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.
- Description and pseudocode
- "Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.
Since: 1.0.1
group :: AsciiText -> [AsciiText] Source #
Separate a text into a list of texts such that:
- Their concatenation is equal to the original argument; and
- Equal adjacent characters in the original argument are in the same text in the result.
This is a specialized form of groupBy
, and is about 40% faster than
.groupBy
==
>>>
group empty
[]>>>
group . singleton $ [char| 'w' |]
["w"]>>>
group [ascii| "nyan" |]
["n","y","a","n"]>>>
group [ascii| "nyaaaan" |]
["n","y","aaaa","n"]
Complexity: \(\Theta(n)\)
Since: 1.0.0
groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText] Source #
Separate a text into a list of texts such that:
- Their concatenation is equal to the original argument; and
- Adjacent characters for which the function argument returns
True
are in the same text in the result.
group
is a special case for the function argument ==
; it is also about
40% faster.
>>>
groupBy (<) empty
[]>>>
groupBy (<) . singleton $ [char| 'w' |]
["w"]>>>
groupBy (<) [ascii| "catboy goes nyan" |]
["c","atboy"," goes"," nyan"]
Complexity: \(\Theta(n)\)
Since: 1.0.0
inits :: AsciiText -> [AsciiText] Source #
All prefixes of the argument, from shortest to longest.
>>>
inits empty
[""]>>>
inits . singleton $ [char| 'w' |]
["","w"]>>>
inits [ascii| "nyan" |]
["","n","ny","nya","nyan"]
Complexity: \(\Theta(n)\)
Since: 1.0.0
tails :: AsciiText -> [AsciiText] Source #
All suffixes of the argument, from shortest to longest.
>>>
tails empty
[""]>>>
tails . singleton $ [char| 'w' |]
["w",""]>>>
tails [ascii| "nyan" |]
["nyan","yan","an","n",""]
Complexity: \(\Theta(n)\)
Since: 1.0.0
Breaking into many substrings
splitOn :: AsciiText -> AsciiText -> [AsciiText] Source #
splitOn needle haystack
, given a needle
of length \(n\) and a haystack
of length \(h\), breaks haystack
into pieces, separated by needle
. Any
occurrences of needle
in haystack
are consumed.
>>>
splitOn empty [ascii| "catboy goes nyan and goes nyan" |]
["catboy goes nyan and goes nyan"]>>>
splitOn [ascii| "nyan" |] empty
[""]>>>
splitOn [ascii| "nyan" |] [ascii| "catboy goes nyan and goes nyan" |]
["catboy goes "," and goes ",""]>>>
splitOn [ascii| "nyan" |] [ascii| "nyan" |]
["",""]>>>
splitOn [ascii| "nyan" |] [ascii| "catboy" |]
["catboy"]
On complexity
This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:
- All ASCII symbols are equally likely to occur in both the needle and the haystack; and
- The needle has length at least two; and
- Both the needle and the haystack contain at least four unique symbols.
We fall back to split
for singleton needles, and there is no work to be
done on empty needles, which means the second assumption always holds.
Worst-case behaviour becomes more likely the more your input satisfies the following conditions:
- The needle and/or haystack use few unique symbols (less than four is the worst); or
- The haystack contains many instances of the second symbol of the needle which don't lead to full matches.
Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.
See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.
- Description and pseudocode
- "Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.
Since: 1.0.1
split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText] Source #
split p t
separates t
into components delimited by separators, for
which p
returns True
. The results do not contain the separators.
\(n\) adjacent separators result in \(n - 1\) empty components in the result.
>>>
split ([char| '~' |] ==) empty
[]>>>
split ([char| '~' |] ==) . singleton $ [char| '~' |]
["",""]>>>
split ([char| '~' |] ==) [ascii| "nyan" |]
["nyan"]>>>
split ([char| '~' |] ==) [ascii| "~nyan" |]
["","nyan"]>>>
split ([char| '~' |] ==) [ascii| "nyan~" |]
["nyan",""]>>>
split ([char| '~' |] ==) [ascii| "nyan~nyan"|]
["nyan","nyan"]>>>
split ([char| '~' |] ==) [ascii| "nyan~~nyan" |]
["nyan","","nyan"]>>>
split ([char| '~' |] ==) [ascii| "nyan~~~nyan" |]
["nyan","","","nyan"]
Complexity: \(\Theta(n)\)
Since: 1.0.0
chunksOf :: Int -> AsciiText -> [AsciiText] Source #
Splits a text into chunks of the specified length. Equivalent to repeatedly
take
ing the specified length until exhaustion. The last item in the result
may thus be shorter than requested.
For any n <= 0
and any t
, chunksOf n t
yields the empty list. This is
identical to the behaviour of the function of the same name in the text
package,
although it doesn't document this fact.
>>>
chunksOf (-100) [ascii| "I am a catboy" |]
[]>>>
chunksOf (-100) empty
[]>>>
chunksOf 0 [ascii| "I am a catboy" |]
[]>>>
chunksOf 0 empty
[]>>>
chunksOf 1 [ascii| "I am a catboy" |]
["I"," ","a","m"," ","a"," ","c","a","t","b","o","y"]>>>
chunksOf 1 empty
[]>>>
chunksOf 2 [ascii| "I am a catboy" |]
["I ","am"," a"," c","at","bo","y"]>>>
chunksOf 300 [ascii| "I am a catboy" |]
["I am a catboy"]
Complexity: \(\Theta(n)\)
Since: 1.0.1
Breaking into lines and words
lines :: AsciiText -> [AsciiText] Source #
Identical to the functions of the same name in the text package, and the Prelude. Specifically, separates the argument into pieces, with LF characters (0x0a) as separators. A single trailing LF is ignored. None of the final results contain LF.
We chose to follow the same semantics for this function as the text package
and the Prelude. This has some consequences,
which the documentation of both the text package and the Prelude does not
properly explain. We list them here - bear these in mind when using this
function, as well as unlines
:
- No platform-specific concept of a 'newline' is ever used by this function. Separation is done on LF, and only LF, regardless of platform. The documentation in both the text package and the Prelude confusingly refers to 'newline characters', which is a category error. We thus specify that LF is the character being split on, rather than mentioning 'newlines' in any way, shape or form.
is not the same asunlines
.
lines
. This is misleadingly described in the Prelude, which claims that (its version of)id
unlines
is 'an inverse operation' to (its version of)lines
. For a precise explanation of why this is the case, please see the documentation forunlines
.
is not the same aslines
. See the doctests below for a demonstration of how they differ.split
([char
| 'n' |]==
)
>>>
lines empty
[]>>>
split ([char| '\n' |] ==) empty
[]>>>
lines [ascii| "catboy goes nyan" |]
["catboy goes nyan"]>>>
split ([char| '\n' |] ==) [ascii| "catboy goes nyan" |]
["catboy goes nyan"]>>>
lines [ascii| "catboy goes nyan\n" |]
["catboy goes nyan"]>>>
split ([char| '\n' |] ==) [ascii| "catboy goes nyan\n" |]
["catboy goes nyan",""]>>>
lines [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
["","catboy","","","goes","","nyan",""]>>>
split ([char| '\n' |] ==) [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
["","catboy","","","goes","","nyan","",""]>>>
lines [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
["\r","catboy\r","goes\r","nyan\r"]>>>
split ([char| '\n' |] ==) [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
["\r","catboy\r","goes\r","nyan\r",""]
Complexity: \(\Theta(n)\)
See also: Wikipedia on newlines
Since: 1.0.1
unlines :: Foldable f => f AsciiText -> AsciiText Source #
Identical to the functions of the same name in the text
package
and the
Prelude.
Specifically, appends an LF character to each of the texts, then concatenates. Equivalent
to
.foldMap
(`snoc'
[char| '\n' |])
We chose to follow the same semantics for this function as the text package
and the Prelude. This has some consequences, which the documentation of both
the text package and the Prelude does not properly explain. We list them here
- bear these in mind when using this function, as well as lines
:
- No platform-specific concept of a 'newline' is ever used by this function. The documentation in both the text package and the Prelude confusing refer to appending a 'terminating newline', which is only a correct statement on platforms where a newline is LF. We thus specify that we append LF, rather than mentioning 'newlines' in any way, shape or form.
is not the same asunlines
.
lines
. This is misleadingly described in the Prelude, which claims that (its version of)id
unlines
is 'an inverse operation' to (its version of)lines
. See the doctests below for a demonstration of this.
>>>
unlines []
"">>>
unlines [[ascii| "nyan" |]]
"nyan\n">>>
unlines . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan\nnyan\nnyan\n">>>
unlines . lines $ [ascii| "catboy goes nyan" |]
"catboy goes nyan\n"
Complexity: \(\Theta(n)\)
See also: Wikipedia on newlines
Since: 1.0.1
words :: AsciiText -> [AsciiText] Source #
Identical to the functions of the same name in the text package and the Prelude. Specifically, separates the argument into pieces, with (non-empty sequences of) word separator characters as separators. A 'word separator character' is any of the following:
- TAB (0x09)
- LF (0x0a)
- VT (0x0b)
- FF (0x0c)
- CR (0x0d)
- Space (0x20)
None of the final results contain any word separator characters. Any sequence of leading, or trailing, word separator characters will be ignored.
We chose to follow the same semantics for this function as the text package
and the Prelude. This has the consequence that
is
not the same as unwords
.
words
id
, although the documentation for the Prelude
confusingly describes (its version of) unwords
as an 'inverse operation'
to (its version of) words
. See the documentation for unwords
for an
explanation of why this is the case.
>>>
words empty
[]>>>
words [ascii| "catboy" |]
["catboy"]>>>
words [ascii| " \r\r\r\rcatboy \n\rgoes\t\t\t\t\tnyan\n " |]
["catboy","goes","nyan"]
Complexity: \(\Theta(n)\)
Since: 1.0.1
unwords :: [AsciiText] -> AsciiText Source #
Identical to the functions of the same name in the text
package
and the
Prelude.
Specifically, links together adjacent texts with a Space character. Equivalent to
.intercalate
[ascii| " " |]
We chose to follow the same semantics for this function as the text package
and the Prelude. This has the consequence that
is
not the same as unwords
.
words
id
, although the documentation for the Prelude
confusingly describes (its version of) unwords
as an 'inverse operation'
to (its version of) words
. See the doctests below for a demonstration of
this.
>>>
unwords []
"">>>
unwords [[ascii| "nyan" |]]
"nyan">>>
unwords . Prelude.replicate 3 $ [ascii| "nyan" |]
"nyan nyan nyan">>>
unwords . words $ [ascii| "nyan\nnyan\nnyan" |]
"nyan nyan nyan"
Complexity: \(\Theta(n)\)
Since: 1.0.1
View patterns
stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText Source #
Return Just
the suffix of the second text if it has the first text as
a prefix, Nothing
otherwise.
>>>
stripPrefix [ascii| "catboy" |] empty
Nothing>>>
stripPrefix empty [ascii| "catboy" |]
Just "catboy">>>
stripPrefix [ascii| "nyan" |] [ascii| "nyan" |]
Just "">>>
stripPrefix [ascii| "nyan" |] [ascii| "catboy" |]
Nothing>>>
stripPrefix [ascii| "catboy" |] [ascii| "catboy goes nyan" |]
Just " goes nyan"
Complexity: \(\Theta(n)\)
Since: 1.0.0
stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText Source #
Return Just
the prefix of the second text if it has the first text as
a suffix, Nothing
otherwise.
>>>
stripSuffix [ascii| "catboy" |] empty
Nothing>>>
stripSuffix empty [ascii| "catboy" |]
Just "catboy">>>
stripSuffix [ascii| "nyan" |] [ascii| "nyan" |]
Just "">>>
stripSuffix [ascii| "nyan" |] [ascii| "catboy" |]
Nothing>>>
stripSuffix [ascii| "nyan" |] [ascii| "catboy goes nyan" |]
Just "catboy goes "
Complexity: \(\Theta(n)\)
Since: 1.0.0
stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText) Source #
stripInfix needle haystack
, given a needle of length \(n\) and a haystack
of length \(h\), attempts to find the first instance of needle
in
haystack
. If successful, it returns Just
the pair consisting of:
- All the text in
haystack
before the first instance ofneedle
; and - All the text in
haystack
after, but not including, the first instance ofneedle
.
If there is no instance of needle
in haystack
, this returns Nothing
.
>>>
stripInfix [ascii| "catboy" |] empty
Nothing>>>
stripInfix empty [ascii| "nyan catboy nyan nyan" |]
Nothing>>>
stripInfix [ascii| "catboy" |] [ascii| "catboy" |]
Just ("","")>>>
stripInfix [ascii| "catboy" |] [ascii| "nyan catboy" |]
Just ("nyan ","")>>>
stripInfix [ascii| "catboy" |] [ascii| "catboy nyan" |]
Just (""," nyan")>>>
stripInfix [ascii| "catboy" |] [ascii| "nyan catboy nyan nyan" |]
Just ("nyan "," nyan nyan")>>>
stripInfix [ascii| "nyan" |] [ascii| "nyanyanyan" |]
Just ("","yanyan")
On complexity
This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:
- All ASCII symbols are equally likely to occur in both the needle and the haystack; and
- The needle has length at least two; and
- Both the needle and the haystack contain at least four unique symbols.
We fall back to split
for singleton needles, and there is no work to be
done on empty needles, which means the second assumption always holds.
Worst-case behaviour becomes more likely the more your input satisfies the following conditions:
- The needle and/or haystack use few unique symbols (less than four is the worst); or
- The haystack contains many instances of the second symbol of the needle which don't lead to full matches.
Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.
See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.
- Description and pseudocode
- "Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.
Since: 1.0.1
commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText) Source #
Find the longest non-empty common prefix of the arguments and return it,
along with the remaining suffixes of both arguments. If the arguments lack a
common, non-empty prefix, returns Nothing
.
>>>
commonPrefixes empty [ascii| "catboy" |]
Nothing>>>
commonPrefixes [ascii| "catboy" |] empty
Nothing>>>
commonPrefixes [ascii| "catboy" |] [ascii| "nyan" |]
Nothing>>>
commonPrefixes [ascii| "catboy" |] [ascii| "catboy" |]
Just ("catboy","","")>>>
commonPrefixes [ascii| "nyan" |] [ascii| "nyan nyan" |]
Just ("nyan",""," nyan")
Complexity: \(\Theta(n)\)
Since: 1.0.1
Searching
filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText Source #
Return the text comprised of all the characters that satisfy the function
argument (that is, for which it returns True
), in the same order as in the
original.
>>>
filter ([char| 'n' |] ==) empty
"">>>
filter ([char| 'n' |] ==) [ascii| "catboy" |]
"">>>
filter ([char| 'n' |] ==) [ascii| "nyan" |]
"nn"
Complexity: \(\Theta(n)\)
Since: 1.0.0
breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)] Source #
breakOnAll needle haystack
, given a needle
of length \(n\) and a
haystack
of length \(h\), finds all non-overlapping instances of needle
in haystack
. Each result consists of the following elements:
- The prefix prior to the match; and
- The match, followed by the rest of the string.
If given an empty needle, the result is a singleton list containing a pair of the entire haystack and the empty text. If given an empty haystack, the result is an empty list.
>>>
breakOnAll empty [ascii| "nyan nyan nyan" |]
[("nyan nyan nyan","")]>>>
breakOnAll [ascii| "nyan" |] empty
[]>>>
breakOnAll [ascii| "nyan" |] [ascii| "nyan" |]
[("","nyan")]>>>
breakOnAll [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
[("","nyan nyan nyan"),("nyan ","nyan nyan"),("nyan nyan ","nyan")]>>>
breakOnAll [ascii| "nyan" |] [ascii| "nyanyanyan" |]
[("","nyanyanyan"),("nyanya","nyan")]
On complexity
This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:
- All ASCII symbols are equally likely to occur in both the needle and the haystack; and
- The needle has length at least two; and
- Both the needle and the haystack contain at least four unique symbols.
We fall back to split
for singleton needles, and there is no work to be
done on empty needles, which means the second assumption always holds.
Worst-case behaviour becomes more likely the more your input satisfies the following conditions:
- The needle and/or haystack use few unique symbols (less than four is the worst); or
- The haystack contains many instances of the second symbol of the needle which don't lead to full matches.
Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.
See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.
- Description and pseudocode
- "Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.
Since: 1.0.1
find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar Source #
Returns Just
the first character in the text satisfying the predicate,
Nothing
otherwise.
>>>
find ([char| 'n' |] ==) empty
Nothing>>>
find ([char| 'n' |] ==) [ascii| "catboy" |]
Nothing>>>
find ([char| 'n' |] ==) [ascii| "nyan" |]
Just '0x6e'>>>
find ([char| 'n' |] /=) [ascii| "nyan" |]
Just '0x79'
Complexity: \(\Theta(n)\)
Since: 1.0.0
partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) Source #
partition p t
is equivalent to (
.filter
p t, filter
(not
p) t)
>>>
partition ([char| 'n' |] ==) empty
("","")>>>
partition ([char| 'n' |] ==) . singleton $ [char| 'n' |]
("n","")>>>
partition ([char| 'n' |] ==) . singleton $ [char| 'w' |]
("","w")>>>
partition ([char| 'n' |] ==) [ascii| "nyan!" |]
("nn","ya!")
Complexity: \(\Theta(n)\)
Since: 1.0.0
Indexing
index :: AsciiText -> Int -> Maybe AsciiChar Source #
Retrieve the ASCII character at the given position in the text. Indexes
begin from 0. If the index provided is invalid (that is, less than 0, equal
to the length of the text, or greater), return Nothing
; otherwise, return
Just
the character at that position.
>>>
index [ascii| "nyan nyan nyan" |] (-100)
Nothing>>>
index [ascii| "nyan nyan nyan" |] 0
Just '0x6e'>>>
index [ascii| "nyan nyan nyan" |] 5
Just '0x6e'>>>
index [ascii| "nyan nyan nyan" |] 2000
Nothing
Complexity: \(\Theta(1)\)
Since: 1.0.1
findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int Source #
Returns Just
the first index in the text such that the character at that
index satisfies the predicate, Nothing
otherwise.
>>>
findIndex ([char| 'n' |] ==) empty
Nothing>>>
findIndex ([char| 'n' |] ==) . singleton $ [char| 'n' |]
Just 0>>>
findIndex ([char| 'n' |] ==) . singleton $ [char| 'w' |]
Nothing>>>
findIndex ([char| 'n' |] ==) [ascii| "nyan" |]
Just 0
Complexity: \(\Theta(n)\)
Since: 1.0.0
count :: AsciiText -> AsciiText -> Int Source #
count needle haystack
, given a needle
of length \(n\) and a haystack of
length \(h\), counts the number of non-overlapping occurrences of needle
in
haystack
. If needle
is empty, the count will be 0.
>>>
count empty [ascii| "nyan nyan nyan" |]
0>>>
count [ascii| "nyan" |] empty
0>>>
count [ascii| "nyan" |] [ascii| "nyan" |]
1>>>
count [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
3>>>
count [ascii| "nyan" |] [ascii| "nyanyanyan" |]
2
On complexity
This function is based on a variant of the NSN algorithm, except it does not detect overlapping needles. Its average-case analysis is based on the assumption that:
- All ASCII symbols are equally likely to occur in both the needle and the haystack; and
- The needle has length at least two; and
- Both the needle and the haystack contain at least four unique symbols.
We fall back to split
for singleton needles, and there is no work to be
done on empty needles, which means the second assumption always holds.
Worst-case behaviour becomes more likely the more your input satisfies the following conditions:
- The needle and/or haystack use few unique symbols (less than four is the worst); or
- The haystack contains many instances of the second symbol of the needle which don't lead to full matches.
Complexity: \(\Theta(h)\) average case, \(\Theta(h \cdot n\)) worst-case.
See also: Note that all the below are references for the original algorithm, which includes searching for overlapping needles. Thus, our implementation will perform better than the analysis suggests.
- Description and pseudocode
- "Algorithms on Strings" by Crochemore, Hancart and Lecroq. PDF available here.
Since: 1.0.1
Zipping
zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)] Source #
'Pair off' characters in both texts at corresponding indices. The result will be limited to the shorter of the two arguments.
>>>
zip empty [ascii| "catboy" |]
[]>>>
zip [ascii| "catboy" |] empty
[]>>>
zip [ascii| "catboy" |] [ascii| "nyan" |]
[('0x63','0x6e'),('0x61','0x79'),('0x74','0x61'),('0x62','0x6e')]
Complexity: \(\Theta(n)\)
Since: 1.0.0
zipWith :: (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiText -> AsciiText -> AsciiText Source #
Combine two texts together in lockstep to produce a new text, using the provided function to combine ASCII characters at each step. The length of the result will be the minimum of the lengths of the two text arguments.
>>>
zipWith max [ascii| "I am a catboy" |] empty
"">>>
zipWith max empty [ascii| "I am a catboy" |]
"">>>
zipWith max [ascii| "I am a catboy" |] [ascii| "Nyan nyan nyan nyan nyan" |]
"Nyan nycntnyy"
Complexity: \(\Theta(n)\)
Since: 1.0.1
Conversions
fromByteString :: ByteString -> Maybe AsciiText Source #
Try and convert a ByteString
into an AsciiText
. Gives Nothing
if the
ByteString
contains any bytes outside the ASCII range (that is, from 0 to
127 inclusive).
>>>
fromByteString "catboy"
Just "catboy">>>
fromByteString . BS.pack $ [128]
Nothing
Complexity: \(\Theta(n)\)
Since: 1.0.0
toByteString :: AsciiText -> ByteString Source #
Reinterpret an AsciiText
as a ByteString
(without copying).
>>>
toByteString empty
"">>>
toByteString . singleton $ [char| 'w' |]
"w">>>
toByteString [ascii| "nyan" |]
"nyan"
Complexity: \(\Theta(1)\)
Since: 1.0.0
Optics
byteStringWise :: Prism' ByteString AsciiText Source #
A convenient demonstration of the relationship between toByteString
and
fromByteString
.
>>>
preview byteStringWise "catboy goes nyan"
Just "catboy goes nyan">>>
preview byteStringWise . BS.pack $ [0xff, 0xff]
Nothing>>>
review byteStringWise [ascii| "I love catboys" |]
"I love catboys"
Since: 1.0.0
packedChars :: Iso' [AsciiChar] AsciiText Source #
Pack (or unpack) a list of ASCII characters into a text.
>>>
view packedChars [[char| 'n' |], [char| 'y' |], [char| 'a' |], [char| 'n' |]]
"nyan">>>
review packedChars [ascii| "nyan" |]
['0x6e','0x79','0x61','0x6e']
Since: 1.0.1
chars :: IxTraversal' Int64 AsciiText AsciiChar Source #
Traverse the individual ASCII characters in a text.
>>>
preview (elementOf chars 0) [ascii| "I am a catboy" |]
Just '0x49'>>>
preview (elementOf chars 100) [ascii| "I am a catboy" |]
Nothing>>>
iover chars (\i x -> bool x [char| 'w' |] . even $ i) [ascii| "I am a catboy" |]
"w wmwawcwtwow"
Since: 1.0.1
packedBytes :: Prism' [Word8] AsciiText Source #
Pack (or unpack) a list of bytes into a text. This isn't as capable as
packedChars
, as that would allow construction of invalid texts.
>>>
preview packedBytes [0x6e, 0x79, 0x61, 0x6e]
Just "nyan">>>
preview packedBytes [0xff, 0xfe]
Nothing>>>
review packedBytes [ascii| "nyan" |]
[110,121,97,110]
Since: 1.0.1
bytes :: IxFold Int64 AsciiText Word8 Source #
Access the individual bytes in a text. This isn't as capable as chars
, as
that would allow modifications of the bytes in ways that aren't valid as
ASCII.
>>>
itoListOf bytes [ascii| "I am a catboy" |]
[(0,73),(1,32),(2,97),(3,109),(4,32),(5,97),(6,32),(7,99),(8,97),(9,116),(10,98),(11,111),(12,121)]
Since: 1.0.1