|
| 1 | +{-# LANGUAGE OverloadedStrings #-} |
| 2 | +{-# LANGUAGE RecordWildCards #-} |
| 3 | +module LambdaComp.Parser |
| 4 | + ( runProgramParser |
| 5 | + ) where |
| 6 | + |
| 7 | +import Control.Monad (void) |
| 8 | +import Control.Monad.Combinators.Expr qualified as Expr |
| 9 | +import Data.Bifunctor (Bifunctor (first)) |
| 10 | +import Data.Char (isAlpha, isAlphaNum) |
| 11 | +import Data.List.NonEmpty (NonEmpty (..)) |
| 12 | +import Data.Semigroup (Any (..)) |
| 13 | +import Data.Set qualified as Set |
| 14 | +import Data.Text (Text) |
| 15 | +import Data.Text qualified as Text |
| 16 | +import Data.Void (Void) |
| 17 | +import Text.Megaparsec |
| 18 | +import Text.Megaparsec.Char qualified as MC |
| 19 | + |
| 20 | +import LambdaComp.Syntax |
| 21 | +import qualified Text.Megaparsec.Char.Lexer as MCL |
| 22 | + |
| 23 | +type Parser = Parsec Void Text |
| 24 | + |
| 25 | +runProgramParser :: String -> Text -> Either String Program |
| 26 | +runProgramParser filename = first errorBundlePretty . runParser program filename |
| 27 | + |
| 28 | +program :: Parser Program |
| 29 | +program = sepEndBy top (symbol ";;") |
| 30 | + |
| 31 | +top :: Parser Top |
| 32 | +top = topTmDef |
| 33 | + |
| 34 | +topTmDef :: Parser Top |
| 35 | +topTmDef = do |
| 36 | + tmDefName <- ident |
| 37 | + symbol ":" |
| 38 | + tmDefType <- tp |
| 39 | + symbol "=" |
| 40 | + tmDefBody <- tm |
| 41 | + pure $ TopTmDef {..} |
| 42 | + |
| 43 | +tp :: Parser Tp |
| 44 | +tp = Expr.makeExprParser atomicTp tpTable |
| 45 | + |
| 46 | +atomicTp :: Parser Tp |
| 47 | +atomicTp = |
| 48 | + choice |
| 49 | + [ TpBool <$ keyword "Bool" |
| 50 | + , TpInt <$ keyword "Int" |
| 51 | + , TpDouble <$ keyword "Double" |
| 52 | + , symbol "(" *> |
| 53 | + (TpUnit <$ symbol ")" |
| 54 | + <|> tp <* symbol ")") |
| 55 | + ] |
| 56 | + |
| 57 | +tpTable :: [[Expr.Operator Parser Tp]] |
| 58 | +tpTable = |
| 59 | + [ [ Expr.InfixR $ TpFun <$ symbol "->" |
| 60 | + ] |
| 61 | + ] |
| 62 | + |
| 63 | +tm :: Parser Tm |
| 64 | +tm = tmLam <|> tmIf <|> tmPrintInt <|> tmRec <|> Expr.makeExprParser atomicTm tmTable |
| 65 | + |
| 66 | +tmLam :: Parser Tm |
| 67 | +tmLam = |
| 68 | + TmLam <$ symbol "\\" |
| 69 | + <*> ident <* symbol "->" |
| 70 | + <*> tm |
| 71 | + |
| 72 | +tmIf :: Parser Tm |
| 73 | +tmIf = |
| 74 | + TmIf <$ keyword "if" |
| 75 | + <*> tm <* keyword "then" |
| 76 | + <*> tm <* keyword "else" |
| 77 | + <*> tm |
| 78 | + |
| 79 | +tmPrintInt :: Parser Tm |
| 80 | +tmPrintInt = |
| 81 | + TmPrintInt <$ keyword "printInt" |
| 82 | + <*> tm <* keyword "before" |
| 83 | + <*> tm |
| 84 | + |
| 85 | +tmRec :: Parser Tm |
| 86 | +tmRec = |
| 87 | + TmRec <$ keyword "rec" |
| 88 | + <*> ident <* symbol "->" |
| 89 | + <*> tm |
| 90 | + |
| 91 | +tmTable :: [[Expr.Operator Parser Tm]] |
| 92 | +tmTable = |
| 93 | + [ [ Expr.Prefix $ TmPrimUnOp PrimINeg <$ symbolNoSpace "-" |
| 94 | + , Expr.Prefix $ TmPrimUnOp PrimDNeg <$ symbolNoSpace "-." |
| 95 | + ] |
| 96 | + , [ Expr.InfixL $ TmApp <$ space |
| 97 | + ] |
| 98 | + , [ Expr.Prefix $ TmPrimUnOp PrimBNot <$ symbol "~" |
| 99 | + ] |
| 100 | + , [ Expr.InfixL $ TmPrimBinOp PrimIMul <$ symbol "*" |
| 101 | + , Expr.InfixL $ TmPrimBinOp PrimIDiv <$ symbol "/" |
| 102 | + , Expr.InfixL $ TmPrimBinOp PrimIMod <$ symbol "%" |
| 103 | + , Expr.InfixL $ TmPrimBinOp PrimDMul <$ symbol "*." |
| 104 | + , Expr.InfixL $ TmPrimBinOp PrimDDiv <$ symbol "/." |
| 105 | + ] |
| 106 | + , [ Expr.InfixL $ TmPrimBinOp PrimIAdd <$ symbol "+" |
| 107 | + , Expr.InfixL $ TmPrimBinOp PrimISub <$ symbol "-" |
| 108 | + , Expr.InfixL $ TmPrimBinOp PrimDAdd <$ symbol "+." |
| 109 | + , Expr.InfixL $ TmPrimBinOp PrimDSub <$ symbol "-." |
| 110 | + ] |
| 111 | + , [ Expr.InfixN $ TmPrimBinOp PrimIEq <$ symbol "=" |
| 112 | + , Expr.InfixN $ TmPrimBinOp PrimINEq <$ symbol "<>" |
| 113 | + , Expr.InfixN $ TmPrimBinOp PrimILt <$ symbol "<" |
| 114 | + , Expr.InfixN $ TmPrimBinOp PrimILe <$ symbol "<=" |
| 115 | + , Expr.InfixN $ TmPrimBinOp PrimIGt <$ symbol ">" |
| 116 | + , Expr.InfixN $ TmPrimBinOp PrimIGe <$ symbol ">=" |
| 117 | + , Expr.InfixN $ TmPrimBinOp PrimDEq <$ symbol "=." |
| 118 | + , Expr.InfixN $ TmPrimBinOp PrimDNEq <$ symbol "<>." |
| 119 | + , Expr.InfixN $ TmPrimBinOp PrimDLt <$ symbol "<." |
| 120 | + , Expr.InfixN $ TmPrimBinOp PrimDLe <$ symbol "<=." |
| 121 | + , Expr.InfixN $ TmPrimBinOp PrimDGt <$ symbol ">." |
| 122 | + , Expr.InfixN $ TmPrimBinOp PrimDGe <$ symbol ">=." |
| 123 | + ] |
| 124 | + , [ Expr.InfixL $ TmPrimBinOp PrimBAnd <$ symbol "&&" |
| 125 | + ] |
| 126 | + , [ Expr.InfixL $ TmPrimBinOp PrimBOr <$ symbol "||" |
| 127 | + ] |
| 128 | + , [ Expr.Postfix $ flip TmAnn <$ symbol ":" <*> tp |
| 129 | + ] |
| 130 | + ] |
| 131 | + |
| 132 | +atomicTm :: Parser Tm |
| 133 | +atomicTm = |
| 134 | + choice |
| 135 | + [ TmTrue <$ keyword "True" |
| 136 | + , TmFalse <$ keyword "False" |
| 137 | + , TmInt <$> int |
| 138 | + , TmDouble <$> double |
| 139 | + , symbol "(" *> |
| 140 | + (TmUnit <$ symbol ")" |
| 141 | + <|> tm <* symbol ")") |
| 142 | + ] |
| 143 | + |
| 144 | +int :: Parser Int |
| 145 | +int = label "integer" . lexeme $ MCL.signed (pure ()) MCL.decimal |
| 146 | + |
| 147 | +double :: Parser Double |
| 148 | +double = label "double precision floating number" . lexeme $ MCL.signed (pure ()) MCL.float |
| 149 | + |
| 150 | +ident :: Parser Ident |
| 151 | +ident = Ident <$> identifier |
| 152 | + |
| 153 | +identifier :: Parser Text |
| 154 | +identifier = label "identifier" $ lexeme $ do |
| 155 | + x <- lookAhead identifierBody |
| 156 | + if x `notElem` keywords |
| 157 | + then identifierBody |
| 158 | + else failure (Just . Label $ 'k' :| ("eyword '" <> Text.unpack x <> "'")) Set.empty |
| 159 | + where |
| 160 | + identifierBody = Text.cons <$> satisfy isIdentChar0 <*> takeWhileP Nothing isIdentChar1 |
| 161 | + |
| 162 | +keyword :: Text -> Parser () |
| 163 | +keyword k = |
| 164 | + label ("keyword '" <> Text.unpack k <> "'") |
| 165 | + $ lexeme |
| 166 | + $ MC.string k >> notFollowedBy (satisfy isIdentChar1) |
| 167 | + |
| 168 | +symbol :: Text -> Parser () |
| 169 | +symbol = void . MCL.symbol space |
| 170 | + |
| 171 | +symbolNoSpace :: Text -> Parser () |
| 172 | +symbolNoSpace = void . MCL.symbol (pure ()) |
| 173 | + |
| 174 | +lexeme :: Parser a -> Parser a |
| 175 | +lexeme = MCL.lexeme space |
| 176 | + |
| 177 | +space :: Parser () |
| 178 | +space = hidden MC.space |
| 179 | + |
| 180 | +keywords :: [Text] |
| 181 | +keywords = |
| 182 | + [ "fun" |
| 183 | + ] |
| 184 | + |
| 185 | +isIdentChar0 :: Char -> Bool |
| 186 | +isIdentChar0 = getAny . ((Any . isAlpha) <> (Any . ('_' ==))) |
| 187 | + |
| 188 | +isIdentChar1 :: Char -> Bool |
| 189 | +isIdentChar1 = getAny . ((Any . isAlphaNum) <> (Any . ('_' ==))) |
0 commit comments