{-# LANGUAGE Safe #-}

-- |
-- Module      :  Text.Megaparsec.Unicode
-- Copyright   :  © 2024–present Megaparsec contributors
-- License     :  FreeBSD
--
-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
-- Stability   :  experimental
-- Portability :  portable
--
-- Utility functions for working with Unicode.
--
-- @since 9.7.0
module Text.Megaparsec.Unicode
  ( stringLength,
    charLength,
    isWideChar,
  )
where

import Data.Array (Array, bounds, listArray, (!))
import Data.Char (ord)

-- | Calculate length of a string taking into account the fact that certain
-- 'Char's may span more than 1 column.
--
-- @since 9.7.0
stringLength :: (Traversable t) => t Char -> Int
stringLength :: forall (t :: * -> *). Traversable t => t Char -> Int
stringLength = t Int -> Int
forall a. Num a => t a -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (t Int -> Int) -> (t Char -> t Int) -> t Char -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> t Char -> t Int
forall a b. (a -> b) -> t a -> t b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Int
charLength

-- | Return length of an individual 'Char'.
--
-- @since 9.7.0
charLength :: Char -> Int
charLength :: Char -> Int
charLength Char
ch = if Char -> Bool
isWideChar Char
ch then Int
2 else Int
1

-- | Determine whether the given 'Char' is “wide”, that is, whether it spans
-- 2 columns instead of one.
--
-- @since 9.7.0
isWideChar :: Char -> Bool
isWideChar :: Char -> Bool
isWideChar Char
c = (Int, Int) -> Bool
go (Array Int (Int, Int) -> (Int, Int)
forall i e. Array i e -> (i, i)
bounds Array Int (Int, Int)
wideCharRanges)
  where
    go :: (Int, Int) -> Bool
go (Int
lo, Int
hi)
      | Int
hi Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lo = Bool
False
      | Int
a Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
n Bool -> Bool -> Bool
&& Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
b = Bool
True
      | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
a = (Int, Int) -> Bool
go (Int
lo, Int -> Int
forall a. Enum a => a -> a
pred Int
mid)
      | Bool
otherwise = (Int, Int) -> Bool
go (Int -> Int
forall a. Enum a => a -> a
succ Int
mid, Int
hi)
      where
        mid :: Int
mid = (Int
lo Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
hi) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2
        (Int
a, Int
b) = Array Int (Int, Int)
wideCharRanges Array Int (Int, Int) -> Int -> (Int, Int)
forall i e. Ix i => Array i e -> i -> e
! Int
mid
    n :: Int
n = Char -> Int
ord Char
c

-- | Wide character ranges.
wideCharRanges :: Array Int (Int, Int)
wideCharRanges :: Array Int (Int, Int)
wideCharRanges =
  (Int, Int) -> [(Int, Int)] -> Array Int (Int, Int)
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray
    (Int
0, Int
118)
    [ (Int
0x001100, Int
0x00115f),
      (Int
0x00231a, Int
0x00231b),
      (Int
0x002329, Int
0x00232a),
      (Int
0x0023e9, Int
0x0023ec),
      (Int
0x0023f0, Int
0x0023f0),
      (Int
0x0023f3, Int
0x0023f3),
      (Int
0x0025fd, Int
0x0025fe),
      (Int
0x002614, Int
0x002615),
      (Int
0x002648, Int
0x002653),
      (Int
0x00267f, Int
0x00267f),
      (Int
0x002693, Int
0x002693),
      (Int
0x0026a1, Int
0x0026a1),
      (Int
0x0026aa, Int
0x0026ab),
      (Int
0x0026bd, Int
0x0026be),
      (Int
0x0026c4, Int
0x0026c5),
      (Int
0x0026ce, Int
0x0026ce),
      (Int
0x0026d4, Int
0x0026d4),
      (Int
0x0026ea, Int
0x0026ea),
      (Int
0x0026f2, Int
0x0026f3),
      (Int
0x0026f5, Int
0x0026f5),
      (Int
0x0026fa, Int
0x0026fa),
      (Int
0x0026fd, Int
0x0026fd),
      (Int
0x002705, Int
0x002705),
      (Int
0x00270a, Int
0x00270b),
      (Int
0x002728, Int
0x002728),
      (Int
0x00274c, Int
0x00274c),
      (Int
0x00274e, Int
0x00274e),
      (Int
0x002753, Int
0x002755),
      (Int
0x002757, Int
0x002757),
      (Int
0x002795, Int
0x002797),
      (Int
0x0027b0, Int
0x0027b0),
      (Int
0x0027bf, Int
0x0027bf),
      (Int
0x002b1b, Int
0x002b1c),
      (Int
0x002b50, Int
0x002b50),
      (Int
0x002b55, Int
0x002b55),
      (Int
0x002e80, Int
0x002e99),
      (Int
0x002e9b, Int
0x002ef3),
      (Int
0x002f00, Int
0x002fd5),
      (Int
0x002ff0, Int
0x002ffb),
      (Int
0x003000, Int
0x00303e),
      (Int
0x003041, Int
0x003096),
      (Int
0x003099, Int
0x0030ff),
      (Int
0x003105, Int
0x00312f),
      (Int
0x003131, Int
0x00318e),
      (Int
0x003190, Int
0x0031ba),
      (Int
0x0031c0, Int
0x0031e3),
      (Int
0x0031f0, Int
0x00321e),
      (Int
0x003220, Int
0x003247),
      (Int
0x003250, Int
0x004db5),
      (Int
0x004e00, Int
0x009fef),
      (Int
0x00a000, Int
0x00a48c),
      (Int
0x00a490, Int
0x00a4c6),
      (Int
0x00a960, Int
0x00a97c),
      (Int
0x00ac00, Int
0x00d7a3),
      (Int
0x00f900, Int
0x00fa6d),
      (Int
0x00fa70, Int
0x00fad9),
      (Int
0x00fe10, Int
0x00fe19),
      (Int
0x00fe30, Int
0x00fe52),
      (Int
0x00fe54, Int
0x00fe66),
      (Int
0x00fe68, Int
0x00fe6b),
      (Int
0x00ff01, Int
0x00ff60),
      (Int
0x00ffe0, Int
0x00ffe6),
      (Int
0x016fe0, Int
0x016fe3),
      (Int
0x017000, Int
0x0187f7),
      (Int
0x018800, Int
0x018af2),
      (Int
0x01b000, Int
0x01b11e),
      (Int
0x01b150, Int
0x01b152),
      (Int
0x01b164, Int
0x01b167),
      (Int
0x01b170, Int
0x01b2fb),
      (Int
0x01f004, Int
0x01f004),
      (Int
0x01f0cf, Int
0x01f0cf),
      (Int
0x01f18e, Int
0x01f18e),
      (Int
0x01f191, Int
0x01f19a),
      (Int
0x01f200, Int
0x01f202),
      (Int
0x01f210, Int
0x01f23b),
      (Int
0x01f240, Int
0x01f248),
      (Int
0x01f250, Int
0x01f251),
      (Int
0x01f260, Int
0x01f265),
      (Int
0x01f300, Int
0x01f320),
      (Int
0x01f32d, Int
0x01f335),
      (Int
0x01f337, Int
0x01f37c),
      (Int
0x01f37e, Int
0x01f393),
      (Int
0x01f3a0, Int
0x01f3ca),
      (Int
0x01f3cf, Int
0x01f3d3),
      (Int
0x01f3e0, Int
0x01f3f0),
      (Int
0x01f3f4, Int
0x01f3f4),
      (Int
0x01f3f8, Int
0x01f43e),
      (Int
0x01f440, Int
0x01f440),
      (Int
0x01f442, Int
0x01f4fc),
      (Int
0x01f4ff, Int
0x01f53d),
      (Int
0x01f54b, Int
0x01f54e),
      (Int
0x01f550, Int
0x01f567),
      (Int
0x01f57a, Int
0x01f57a),
      (Int
0x01f595, Int
0x01f596),
      (Int
0x01f5a4, Int
0x01f5a4),
      (Int
0x01f5fb, Int
0x01f64f),
      (Int
0x01f680, Int
0x01f6c5),
      (Int
0x01f6cc, Int
0x01f6cc),
      (Int
0x01f6d0, Int
0x01f6d2),
      (Int
0x01f6d5, Int
0x01f6d5),
      (Int
0x01f6eb, Int
0x01f6ec),
      (Int
0x01f6f4, Int
0x01f6fa),
      (Int
0x01f7e0, Int
0x01f7eb),
      (Int
0x01f90d, Int
0x01f971),
      (Int
0x01f973, Int
0x01f976),
      (Int
0x01f97a, Int
0x01f9a2),
      (Int
0x01f9a5, Int
0x01f9aa),
      (Int
0x01f9ae, Int
0x01f9ca),
      (Int
0x01f9cd, Int
0x01f9ff),
      (Int
0x01fa70, Int
0x01fa73),
      (Int
0x01fa78, Int
0x01fa7a),
      (Int
0x01fa80, Int
0x01fa82),
      (Int
0x01fa90, Int
0x01fa95),
      (Int
0x020000, Int
0x02a6d6),
      (Int
0x02a700, Int
0x02b734),
      (Int
0x02b740, Int
0x02b81d),
      (Int
0x02b820, Int
0x02cea1),
      (Int
0x02ceb0, Int
0x02ebe0),
      (Int
0x02f800, Int
0x02fa1d)
    ]
{-# NOINLINE wideCharRanges #-}