import Data.List (partition,nub) -- v is the type of non-terminals -- For terminals we use characters (but $ is reserved for end of word) data Symbol v = Terminal Char | NonTerminal v deriving Show data Production v = Production v [Symbol v] deriving Show prodNT :: Production v -> v prodNT (Production v _) = v prodSF :: Production v -> [Symbol v] prodSF (Production _ sf) = sf data CFG v = CFG v [Production v] start :: CFG v -> v start (CFG v _) = v productions :: CFG v -> [Production v] productions (CFG _ ps) = ps -- Production for a fixed non-terminal productionsNT :: Eq v => CFG v -> v -> [Production v] productionsNT cfg v = filter (\p -> prodNT p == v) (productions cfg) nonterminals :: Eq v => CFG v -> [v] nonterminals cfg = nub (map prodNT (productions cfg)) -- All the sentential forms derivable from a non-terminal derivates :: Eq v => CFG v -> v -> [[Symbol v]] derivates cfg v = map prodSF $ filter (\p -> prodNT p == v) (productions cfg) {- Example : S -> AB | C A -> aA | epsilon B -> Bb | c C -> Cc | b -} data NT1 = Snt | Ant | Bnt | Cnt deriving (Eq,Show) cfg1 :: CFG NT1 cfg1 = CFG Snt [Production Snt [NonTerminal Ant, NonTerminal Bnt], Production Snt [NonTerminal Cnt], Production Ant [Terminal 'a', NonTerminal Ant], Production Ant [], Production Bnt [NonTerminal Bnt, Terminal 'b'], Production Bnt [Terminal 'c'], Production Cnt [NonTerminal Cnt, Terminal 'c'], Production Cnt [Terminal 'b']] -- Inefficient generation of all words (leftmost derivation) -- Applying all possible productions to the leftmost symbol leftProd :: Eq v => CFG v -> [Symbol v] -> [[Symbol v]] leftProd cfg [] = [[]] leftProd cfg (Terminal a : w) = map (Terminal a:) (leftProd cfg w) leftProd cfg (NonTerminal v : w) = map (++w) (derivates cfg v) getWord :: [Symbol v] -> Maybe String getWord [] = Just "" getWord (Terminal a : sf) = case (getWord sf) of Nothing -> Nothing Just w -> Just (a:w) getWord _ = Nothing partSF :: [[Symbol v]] -> ([String],[[Symbol v]]) partSF [] = ([],[]) partSF (sf:sfs) = let (ss,rs) = partSF sfs in case (getWord sf) of Nothing -> (ss,sf:rs) Just s -> (s:ss,rs) allWordsSF :: Eq v => CFG v -> [[Symbol v]] -> [String] allWordsSF cfg sf = let (ss,rs) = partSF sf in ss ++ allWordsSF cfg (concat $ map (leftProd cfg) rs) allWords :: Eq v => CFG v -> [String] allWords cfg = allWordsSF cfg [[NonTerminal (start cfg)]] -- Construction of the set of nullable non-terminals -- One-step: nullable with respect to a set of already known nullables nullRel :: Eq v => CFG v -> [v] -> v -> Bool nullRel cfg nvs v = or (map nullSF (derivates cfg v)) where nullSF [] = True nullSF (Terminal _:_) = False nullSF (NonTerminal v':sf) = (v' `elem` nvs) && (nullSF sf) -- nvs = already known nullables, vs = non-terminals to check nulls :: Eq v => CFG v -> [v] -> [v] -> [v] nulls cfg nvs vs = let (nvs',vs') = partition (nullRel cfg nvs) vs in case nvs' of [] -> nvs nvs' -> nulls cfg (nvs'++nvs) vs' nullables :: Eq v => CFG v -> [v] nullables cfg = nulls cfg [] (nonterminals cfg) nullable :: Eq v => CFG v -> v -> Bool nullable cfg v = v `elem` (nullables cfg) nullableSF :: Eq v => CFG v -> [Symbol v] -> Bool nullableSF cfg [] = True nullableSF cfg (Terminal _:_) = False nullableSF cfg (NonTerminal v:sf) = (nullable cfg v) && (nullableSF cfg sf) -- All the possible first characters of a word generated by a non-terminal firstSF :: Eq v => CFG v -> (v -> [Char]) -> [Symbol v] -> [Char] firstSF cfg f [] = [] firstSF cfg f (Terminal a : _) = [a] firstSF cfg f (NonTerminal v : sf) = if (nullable cfg v) then (f v)++(firstSF cfg f sf) else (f v) firstRel :: Eq v => CFG v -> (v -> [Char]) -> v -> [Char] firstRel cfg f = let f' = \v -> nub $ concat (map (firstSF cfg f) (derivates cfg v)) in if and [f v == f' v | v <- nonterminals cfg] then f' else firstRel cfg f' firstSym :: Eq v => CFG v -> v -> [Char] firstSym cfg = firstRel cfg (\_->[]) firstSymSF :: Eq v => CFG v -> [Symbol v] -> [Char] firstSymSF cfg sf = firstSF cfg (firstSym cfg) sf -- All the characters that can come after a non-terminal -- The sentential forms that follow v in the productions follSF :: Eq v => CFG v -> v -> [[Symbol v]] follSF cfg v = foldr follNTinSF [] $ concat (map (derivates cfg) (nonterminals cfg)) where follNTinSF [] sfs = sfs follNTinSF (NonTerminal v':sf) sfs = if v==v' then sf:sfs else follNTinSF sf sfs follNTinSF (_:sf) sfs = follNTinSF sf sfs -- The non-terminals with a production where v is follow by a nullable form follNull :: Eq v => CFG v -> v -> [v] follNull cfg v = [v' | (Production v' sf) <- productions cfg, follNullinSF sf] where follNullinSF [] = False follNullinSF (NonTerminal v'':sf) = if v==v'' then (nullableSF cfg sf) else follNullinSF sf follNullinSF (_:sf) = follNullinSF sf followRel :: Eq v => CFG v -> (v -> [Char]) -> v -> [Char] followRel cfg f = let f' = \v -> nub.concat $ (if v==(start cfg) then ['$'] else []) : (map f (follNull cfg v)) ++ (map (firstSymSF cfg) (follSF cfg v)) in if and [f v == f' v | v <- nonterminals cfg] then f' else followRel cfg f' follow :: Eq v => CFG v -> v -> [Char] follow cfg = followRel cfg (\v -> []) -- All the first characters that can be generated by a production lookahead :: Eq v => CFG v -> Production v -> [Char] lookahead cfg (Production v sf) = if (nullableSF cfg sf) then (follow cfg v)++(firstSymSF cfg sf) else (firstSymSF cfg sf) -- Test if a grammar is LL(1) -- Check if two lists are disjoint disjoint :: Eq v => [v] -> [v] -> Bool disjoint vs1 vs2 = not (or (map (`elem` vs1) vs2)) -- Check if a list of lists is disjoint disjList :: Eq v => [[v]] -> Bool disjList [] = True disjList (vs:vss) = (and (map (disjoint vs) vss)) && disjList vss llone :: Eq v => CFG v -> Bool llone cfg = and $ map (\v -> disjList (map (lookahead cfg) (productionsNT cfg v))) (nonterminals cfg) -- Assume the grammar is LL(1) {- Example of LL(1) grammar S -> AB | C A -> aA | epsilon B -> bB | c C -> eC | d -} cfg2 :: CFG NT1 cfg2 = CFG Snt [Production Snt [NonTerminal Ant, NonTerminal Bnt], Production Snt [NonTerminal Cnt], Production Ant [Terminal 'a', NonTerminal Ant], Production Ant [], Production Bnt [ Terminal 'b', NonTerminal Bnt], Production Bnt [Terminal 'c'], Production Cnt [ Terminal 'e', NonTerminal Cnt], Production Cnt [Terminal 'd']] -- Given a non-terminal and a character, determine the production parseStep :: Eq v => CFG v -> v -> Char -> Maybe [Symbol v] parseStep cfg v c = foldr (\p msf -> if c `elem` (lookahead cfg p) then Just (prodSF p) else msf) Nothing (productionsNT cfg v) -- Parse an initial part of a string from a non-terminal -- and return the remaining of the string parseNT :: Eq v => CFG v -> v -> String -> Maybe String parseNT cfg v "" = if (nullable cfg v) then Just "" else Nothing parseNT cfg v s@(c:_) = case (parseStep cfg v c) of Nothing -> Nothing Just sf -> parseSF cfg sf s parseSF :: Eq v => CFG v -> [Symbol v] -> String -> Maybe String parseSF cfg [] s = Just s parseSF cfg (Terminal c:sf) (c':s') = parseSF cfg sf s' parseSF cfg (NonTerminal v:sf) s = case (parseNT cfg v s) of Nothing -> Nothing Just s' -> parseSF cfg sf s' parseSF cfg _ _ = Nothing -- Determine whether a string is in the language of the grammar parse :: Eq v => CFG v -> String -> Bool parse cfg s = (parseNT cfg (start cfg) s) == Just ""