---------------------------------------------------------------------- A LIBRARY OF MONADIC PARSER COMBINATORS Graham Hutton and Erik Meijer, December 1995 This literate Gofer script defines a library of parser combinators, and is taken from our article "Monadic Parser Combinators". For reasons of efficiency or specific details of Gofer, some combinators are defined slightly differently from the article. Their functionality, however, remains the same as those given in the article. NOTE: This library requires Gofer version 2.30b or greater. You will also need to set the standard prelude to be the constructor classes prelude "cc.prelude"; see the Gofer documentation for further details. --- Operator precedences --------------------------------------------- > infixr 5 +++ --- Class definitions ------------------------------------------------ class Functor f where map :: (a -> b) -> (f a -> f b) class Functor m => Monad m where result :: a -> m a bind :: m a -> (a -> m b) -> m b join :: m (m a) -> m a x `bind` f = join (map f x) join x = bind x id > class Monad m => StateMonad m s where > update :: (s -> s) -> m s > set :: s -> m s > fetch :: m s > > set s = update (\_ -> s) > fetch = update id > > class Monad m => ReaderMonad m s where > env :: m s > setenv :: s -> m a -> m a --- The exception monad ---------------------------------------------- > data Maybe a = Just a | Nothing > > instance Monad Maybe where > -- result :: a -> Maybe a > result x = Just x > > -- bind :: Maybe a -> (a -> Maybe b) -> Maybe b > (Just x) `bind` f = f x > Nothing `bind` f = Nothing > > instance Monad0 Maybe where > -- zero :: Maybe a > zero = Nothing > > instance MonadPlus Maybe where > -- (++) :: Maybe a -> Maybe a -> Maybe a > Just x ++ y = Just x > Nothing ++ y = y --- The non-determinism monad ---------------------------------------- instance Functor [] where -- map :: (a -> b) -> ([a] -> [b]) map f [] = [] map f (x:xs) = (f x) : map f xs instance Monad [] where -- result :: a -> [a] result x = [x] -- bind :: [a] -> (a -> [b]) -> [b] [] `bind` f = [] (x:xs) `bind` f = f x ++ (xs `bind` f) instance Monad0 [] where -- zero :: [a] zero = [] instance MonadPlus [] where -- (++) :: [a] -> [a] -> [a] [] ++ ys = ys (x:xs) ++ ys = x : (xs ++ ys) --- The state-transformer monad -------------------------------------- > type State s a = s -> (a,s) > in mapST, resultST, bindST, updateST > > mapST :: (a -> b) -> (State s a -> State s b) > mapST f st = \s -> let (v,s') = st s in (f v, s') > > resultST :: a -> State s a > resultST v = \s -> (v,s) > > bindST :: State s a -> (a -> State s b) -> State s b > st `bindST` f = \s -> let (v,s') = st s in f v s' > > updateST :: (s -> s) -> State s s > updateST f = \s -> (s, f s) > > instance Functor (State s) where > map = mapST > > instance Monad (State s) where > result = resultST > bind = bindST > > instance StateMonad (State s) s where > update = updateST --- The parameterised state-transformer monad ------------------------ > type StateM m s a = s -> m (a,s) > in mapSTM, resultSTM, bindSTM, zeroSTM, > plusSTM, updateSTM, force, first, parse > > mapSTM :: Functor m => (a -> b) -> (StateM m s a -> StateM m s b) > mapSTM f stm = \s -> map (\(v,s') -> (f v, s')) (stm s) > > resultSTM :: Monad m => a -> StateM m s a > resultSTM v = \s -> result (v,s) > > bindSTM :: Monad m => StateM m s a -> (a -> StateM m s b) -> StateM m s b > stm `bindSTM` f = \s -> stm s `bind` \(v,s') -> f v s' > > zeroSTM :: Monad0 m => StateM m s a > zeroSTM = \s -> zero > > plusSTM :: MonadPlus m => StateM m s a -> StateM m s a -> StateM m s a > stm `plusSTM` stm' = \s -> stm s ++ stm' s > > updateSTM :: Monad m => (s -> s) -> StateM m s s > updateSTM f = \s -> result (s, f s) > > instance Functor m => Functor (StateM m s) where > map = mapSTM > > instance Monad m => Monad (StateM m s) where > result = resultSTM > bind = bindSTM > > instance Monad0 m => Monad0 (StateM m s) where > zero = zeroSTM > > instance MonadPlus m => MonadPlus (StateM m s) where > (++) = plusSTM > > instance Monad m => StateMonad (StateM m s) s where > update = updateSTM --- The parameterised state-reader monad ----------------------------- > type ReaderM m s a = s -> m a > in mapSRM, resultSRM, bindSRM, zeroSRM, plusSRM, > envSRM, setenvSRM, updateSTRM, force, first, parse > > mapSRM :: Functor m => (a -> b) -> (ReaderM m s a -> ReaderM m s b) > mapSRM f srm = \s -> map f (srm s) > > resultSRM :: Monad m => a -> ReaderM m s a > resultSRM v = \s -> result v > > bindSRM :: Monad m => ReaderM m s a -> (a -> ReaderM m s b) -> ReaderM m s b > srm `bindSRM` f = \s -> srm s `bind` \v -> f v s > > zeroSRM :: Monad0 m => ReaderM m s a > zeroSRM = \s -> zero > > plusSRM :: MonadPlus m => ReaderM m s a -> ReaderM m s a -> ReaderM m s a > srm `plusSRM` srm' = \s -> srm s ++ srm' s > > envSRM :: Monad m => ReaderM m s s > envSRM = \s -> result s > > setenvSRM :: Monad m => s -> ReaderM m s a -> ReaderM m s a > setenvSRM s srm = \_ -> srm s > > updateSTRM :: StateMonad m a => (a -> a) -> ReaderM m s a > updateSTRM f = \_ -> update f > > instance Functor m => Functor (ReaderM m s) where > map = mapSRM > > instance Monad m => Monad (ReaderM m s) where > result = resultSRM > bind = bindSRM > > instance Monad0 m => Monad0 (ReaderM m s) where > zero = zeroSRM > > instance MonadPlus m => MonadPlus (ReaderM m s) where > (++) = plusSRM > > instance Monad m => ReaderMonad (ReaderM m s) s where > env = envSRM > setenv = setenvSRM > > instance StateMonad m s => StateMonad (ReaderM m s') s where > update = updateSTRM --- Primitive parser combinators ------------------------------------- > type Pos = (Int,Int) > > type Pstring = (Pos,String) > > type Parser a = ReaderM (StateM [] Pstring) Pos a map :: (a -> b) -> (Parser a -> Parser b) result :: a -> Parser a bind :: Parser a -> (a -> Parser b) -> Parser b zero :: Parser a (++) :: Parser a -> Parser a -> Parser a update :: (Pstring -> Pstring) -> Parser Pstring set :: Pstring -> Parser Pstring fetch :: Parser Pstring env :: Parser Pos setenv :: Pos -> Parser a -> Parser a > item :: Parser Char > item = [x | (pos,x:_) <- update newstate > , defpos <- env > , onside pos defpos] > > onside :: Pos -> Pos -> Bool > onside (l,c) (dl,dc) = (c > dc) || (l == dl) > > newstate :: Pstring -> Pstring > newstate ((l,c),x:xs) > = (newpos,xs) > where > newpos = case x of > '\n' -> (l+1,0) > '\t' -> (l,((c `div` 8)+1)*8) > _ -> (l,c+1) > > force :: Parser a -> Parser a > force p = \pos inp -> let x = p pos inp in > (fst (head x), snd (head x)) : tail x > > first :: Parser a -> Parser a > first p = \pos inp -> case p pos inp of > [] -> [] > (x:xs) -> [x] > > parse :: Parser a -> String -> [(a,Pstring)] > parse p inp = strip p (0,-1) ((0,0),inp) --- Derived combinators ---------------------------------------------- > (+++) :: Parser a -> Parser a -> Parser a > p +++ q = first (p ++ q) > > sat :: (Char -> Bool) -> Parser Char > sat p = [x | x <- item, p x] > > many :: Parser a -> Parser [a] > many p = force (many1 p +++ [[]]) > > many1 :: Parser a -> Parser [a] > many1 p = [x:xs | x <- p, xs <- many p] > > sepby :: Parser a -> Parser b -> Parser [a] > p `sepby` sep = (p `sepby1` sep) +++ [[]] > > sepby1 :: Parser a -> Parser b -> Parser [a] > p `sepby1` sep = [x:xs | x <- p > , xs <- many [y | _ <- sep, y <- p]] > > chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a > chainl p op v = (p `chainl1` op) +++ [v] > > chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a > p `chainl1` op = p `bind` rest > where > rest x = (op `bind` \f -> > p `bind` \y -> > rest (f x y)) +++ [x] > > chainr :: Parser a -> Parser (a -> a -> a) -> a -> Parser a > chainr p op v = (p `chainr1` op) +++ [v] > > chainr1 :: Parser a -> Parser (a -> a -> a) -> Parser a > p `chainr1` op = p `bind` \x -> > [f x y | f <- op, y <- p `chainr1` op] +++ [x] > > ops :: [(Parser a, b)] -> Parser b > ops xs = foldr1 (+++) [[op | _ <- p] | (p,op) <- xs] > > bracket :: Parser a -> Parser b -> Parser c -> Parser b > bracket open p close = [x | _ <- open, x <- p, _ <- close] --- Useful parsers --------------------------------------------------- > char :: Char -> Parser Char > char x = sat (\y -> x == y) > > digit :: Parser Char > digit = sat isDigit > > lower :: Parser Char > lower = sat isLower > > upper :: Parser Char > upper = sat isUpper > > letter :: Parser Char > letter = sat isAlpha > > alphanum :: Parser Char > alphanum = sat isAlphanum > > string :: String -> Parser String > string "" = [""] > string (x:xs) = [x:xs | _ <- char x, _ <- string xs] > > ident :: Parser String > ident = [x:xs | x <- lower, xs <- many alphanum] > > nat :: Parser Int > nat = [ord x - ord '0' | x <- digit] `chainl1` [op] > where > m `op` n = 10*m + n > > int :: Parser Int > int = [-n | _ <- char '-', n <- nat] +++ nat --- Lexical combinators ---------------------------------------------- > spaces :: Parser () > spaces = [() | _ <- many1 (sat isSpace)] > > comment :: Parser () > comment = [() | _ <- string "--" > , _ <- many (sat (\x -> x /= '\n'))] > > junk :: Parser () > junk = [() | _ <- setenv (0,-1) (many (spaces ++ comment))] > > strip :: Parser a -> Parser a > strip p = [v | _ <- junk, v <- p] > > token :: Parser a -> Parser a > token p = [v | v <- p, _ <- junk] > > many1_offside :: Parser a -> Parser [a] > many1_offside p = [vs | (pos,_) <- fetch :: Parser Pstring > , vs <- setenv pos (many1 (off p))] > > off :: Parser a -> Parser a > off p = [v | (dl,dc) <- env :: Parser Pos > , ((l,c),_) <- fetch :: Parser Pstring > , c == dc > , v <- setenv (l,dc) p] > > many_offside :: Parser a -> Parser [a] > many_offside p = many1_offside p +++ [[]] --- Token parsers ---------------------------------------------------- > natural :: Parser Int > natural = token nat > > integer :: Parser Int > integer = token int > > symbol :: String -> Parser String > symbol xs = token (string xs) > > identifier :: [String] -> Parser String > identifier ks = token [x | x <- ident, not (elem x ks)] --- Error reporting combinators -------------------------------------- > mustbe :: String -> Parser String > mustbe xs = symbol xs +++ err ("Expected \"" ++ xs ++ "\"") > > err :: String -> Parser a > err xs = (fetch :: Parser Pstring) `bind` \((l,c),_) -> > error ("PARSE ERROR (line " ++ show l ++ > ", column " ++ show c ++ ") -- " ++ xs) ----------------------------------------------------------------------