{-# LANGUAGE DeriveDataTypeable #-}
{- | This module implements KOI8-R encoding which covers the russian and bulgarian alphabet.
     See <http://en.wikipedia.org/wiki/KOI8-R> for more information.
 -}
module Data.Encoding.KOI8R
	(KOI8R(..)) where

import Control.Throws
import Data.Array.Unboxed
import Data.Char (ord,chr)
import Data.Map hiding (map,(!))
import Data.Word
import Prelude hiding (lookup)
import Data.Typeable

import Data.Encoding.Base
import Data.Encoding.ByteSource
import Data.Encoding.ByteSink
import Data.Encoding.Exception

data KOI8R = KOI8R deriving (Eq,Show,Typeable)

koi8rArr :: UArray Word8 Char
koi8rArr = listArray (128,255) koi8rList

koi8rMap :: Map Char Word8
koi8rMap = fromList (zip koi8rList [128..])

koi8rList :: [Char]
koi8rList =
	['\x2500','\x2502','\x250c','\x2510','\x2514','\x2518','\x251c','\x2524'
	,'\x252c','\x2534','\x253c','\x2580','\x2584','\x2588','\x258c','\x2590'
	,'\x2591','\x2592','\x2593','\x2320','\x25a0','\x2219','\x221a','\x2248'
	,'\x2264','\x2265','\x00a0','\x2321','\x00b0','\x00b2','\x00b7','\x00f7'
	,'\x2550','\x2551','\x2552','\x0451','\x2553','\x2554','\x2555','\x2556'
	,'\x2557','\x2558','\x2559','\x255a','\x255b','\x255c','\x255d','\x255e'
	,'\x255f','\x2560','\x2561','\x0401','\x2562','\x2563','\x2564','\x2565'
	,'\x2566','\x2567','\x2568','\x2569','\x256a','\x256b','\x256c','\x00a9'
	,'\x044e','\x0430','\x0431','\x0446','\x0434','\x0435','\x0444','\x0433'
	,'\x0445','\x0438','\x0439','\x043a','\x043b','\x043c','\x043d','\x043e'
	,'\x043f','\x044f','\x0440','\x0441','\x0442','\x0443','\x0436','\x0432'
	,'\x044c','\x044b','\x0437','\x0448','\x044d','\x0449','\x0447','\x044a'
	,'\x042e','\x0410','\x0411','\x0426','\x0414','\x0415','\x0424','\x0413'
	,'\x0425','\x0418','\x0419','\x041a','\x041b','\x041c','\x041d','\x041e'
	,'\x041f','\x042f','\x0420','\x0421','\x0422','\x0423','\x0416','\x0412'
	,'\x042c','\x042b','\x0417','\x0428','\x042d','\x0429','\x0427','\x042a'
	]

instance Encoding KOI8R where
    decodeChar _ = do
      w <- fetchWord8
      if w < 128
        then return $ chr $ fromIntegral w
        else return $ koi8rArr!w
    encodeChar _ ch
	| ch < '\128' = pushWord8 $ fromIntegral $ ord ch
	| otherwise   = case lookup ch koi8rMap of
		Just w -> pushWord8 w
		Nothing -> throwException (HasNoRepresentation ch)
    encodeable _ c = member c koi8rMap