-- *************************************************************
-- *
-- * Trivial eXpression Language (TXL) Lexer
-- *
-- * For G52MAL Exercises Set 5, problem 3, Spring 2011
-- *
-- *************************************************************

module Lexer where

import Data.Char

----------------------------------------------------------------
-- Token type
----------------------------------------------------------------

type Id = String

data Token  =  T_Int Int
            |  T_Id Id
            |  T_Plus
            |  T_Minus
            |  T_Times
            |  T_Divide
            |  T_LeftPar
            |  T_RightPar
            |  T_Equal
            |  T_Let
            |  T_In
            deriving (Eq, Show) 

----------------------------------------------------------------
-- Lexer
----------------------------------------------------------------

-- Note that the use of "error" to reject an input is rather crude
-- A better approach would be to use a Maybe type
-- lexer :: [Char] -> Maybe [Token]

lexer                      :: [Char] -> [Token]

-- End of input

lexer []                   =  []

-- Drop white space and new lines

lexer (' '  : cs)          =  lexer cs
lexer ('\n' : cs)          =  lexer cs

-- Lex simple tokens

lexer ('+' : cs)           =  T_Plus     : lexer cs
lexer ('-' : cs)           =  T_Minus    : lexer cs
lexer ('*' : cs)           =  T_Times    : lexer cs
lexer ('/' : cs)           =  T_Divide   : lexer cs
lexer ('(' : cs)           =  T_LeftPar  : lexer cs
lexer (')' : cs)           =  T_RightPar : lexer cs
lexer ('=' : cs)           =  T_Equal    : lexer cs

-- Lex literal integers, identifiers, and keywords

lexer (c : cs) | isDigit c =  T_Int (read (c : takeWhile isDigit cs))
                              : lexer (dropWhile isDigit cs)

               | isAlpha c =  mkIdOrKwd (c : takeWhile isAlphaNum cs)
                              : lexer (dropWhile isAlphaNum cs)

               | otherwise =  error ("Unrecognised Character: " ++ [c])

                 where
                   mkIdOrKwd        :: String -> Token
                   mkIdOrKwd "let"  =  T_Let
                   mkIdOrKwd "in"   =  T_In
                   mkIdOrKwd cs     =  T_Id cs

----------------------------------------------------------------
