module Evaluation
(Evaluation.main)
where
import Control.Monad
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as BSL
import Data.Char
import Data.Csv
import Data.Either
import Data.List
import qualified Data.Map as M
import Data.Ord
import qualified Data.PSQueue as PS
import qualified Data.Text as T
import Data.Text.Encoding
import qualified Data.Text.IO as TI
import qualified Data.Vector as V
import Hag
import Helpers
import NLP.Tokenize
import Preprocess
import Preprocess
import qualified System.Directory as S
import System.Environment
import Tweets
createDictionary :: V.Vector Tweet -> FeatureMap
createDictionary tweets = V.foldl (M.unionWith (+)) M.empty allTweets
where allTweets = V.map extractFeatures tweets
createDictionaryFromMap :: M.Map Tweet FeatureMap -> FeatureMap
createDictionaryFromMap tweetMap = foldl (M.unionWith (+)) M.empty $ M.elems tweetMap
main :: IO ()
main = do
(dir:_) <- getArgs
files <- getFiles dir
csvs <- mapM TI.readFile $ sort files
let processedCsvs = map preprocess csvs
r = map parseCsv processedCsvs
listOfVectorOfTweets = rights r :: [V.Vector Tweet]
vTweets = V.concat listOfVectorOfTweets :: V.Vector Tweet
aggTweets = filterByLabel vTweets "aggressive" :: V.Vector Tweet
nonAggTweets = filterByLabel vTweets "non_aggressive" :: V.Vector Tweet
dict = createDictionary vTweets
aggDict = createDictionary aggTweets
nonAggDict = createDictionary nonAggTweets
words = (encode $ sortBy (comparing $ snd) $
M.toList dict) :: L.ByteString
aggWords = (encode $ sortBy (comparing $ snd) $
M.toList aggDict) :: L.ByteString
nonAggWords = (encode $ sortBy (comparing $ snd) $
M.toList nonAggDict) :: L.ByteString
header = encode ["word","frequency"]
L.writeFile "agg_words.csv" $ header `L.append` aggWords
L.writeFile "non_agg_words.csv" $ header `L.append` nonAggWords
L.writeFile "words.csv" $ header `L.append` words
print $ V.length aggTweets