Merge pull request #7 from nihil-lang/develop

Merge working parser into master
zilch-lang · Sep 2, 2020 · 30512e9 · 30512e9
2 parents 0bcb8bb + 75be47d
commit 30512e9
Show file tree

Hide file tree

Showing 33 changed files with 1,242 additions and 68 deletions.
diff --git a/app/Main.hs b/app/Main.hs
@@ -1,6 +1,32 @@
+{-# OPTIONS_GHC -Wno-unused-imports #-}
+
 module Main where
 
-import Lib
+import Language.NStar.Syntax (lexFile, parseFile)
+import Text.Diagnose (printDiagnostic, (<~<))
+import System.IO (stderr, stdout, hPrint, hPutStr)
+import GHC.ResponseFile (getArgsWithResponseFiles)
+import qualified Data.Text as Text
+import qualified Data.Text.IO as Text
 
 main :: IO ()
-main = someFunc
+main = do
+  -- TODO: proper error handling
+  -- TODO: handle command-line arguments correctly (flags, options, etc)
+
+  (file : _) <- getArgsWithResponseFiles
+  content <- Text.readFile file
+
+  tokens <- case lexFile file content of
+    Left diag -> do
+      printDiagnostic stderr (diag <~< (file, lines $ Text.unpack content))
+      error "Lexer failed with exit code -1"
+    Right res -> pure res
+
+  ast <- case parseFile file tokens of
+    Left diag -> do
+      printDiagnostic stderr (diag <~< (file, lines $ Text.unpack content))
+      error "Parser failed with exit code -1"
+    Right res -> pure res
+
+  pure ()
diff --git a/default-extensions.yaml b/default-extensions.yaml
@@ -0,0 +1,3 @@
+- BlockArguments
+- LambdaCase
+- BinaryLiterals
diff --git a/docs/escape-sequences.md b/docs/escape-sequences.md
@@ -0,0 +1,22 @@
+# Escape sequences in N*
+
+Escape sequences are special characters prefixed with `\` that give them special power. Most of the escape sequences are used to input invisible control characters like a line feed or a null character.
+
+Escape sequences in N* follow closely those available in C:
+
+| Escape sequence | Description                                                                                                  |
+|:---------------:|--------------------------------------------------------------------------------------------------------------|
+| `\a`            | Bell, makes an audible ding                                                                                  |
+| `\b`            | Backspace character, go back one character on the display                                                    |
+| `\e`            | Escape character, used in Unix terminals for the ANSI color codes                                            |
+| `\f`            | Form feed page break                                                                                         |
+| `\n`            | Newline, Line feed                                                                                           |
+| `\r`            | Carriage return                                                                                              |
+| `\t`            | Horizontal tab                                                                                               |
+| `\v`            | Vertical tab                                                                                                 |
+| `\\`            | Backslash                                                                                                    |
+| `\'`            | Apostrophe, useful in a character construct `'c'`                                                            |
+| `\"`            | Double quotation mark, useful in a string construct `"..."`<br>Strings are, however, not yet available in N* |
+| `\0`            | Null character                                                                                               |
+
+> TODO: support unicode escape sequences `\uHHHH` and `\uHHHHHHHH`
diff --git a/docs/registers.md b/docs/registers.md
@@ -0,0 +1,30 @@
+<!-- markdown-toc start - Don't edit this section. Run M-x markdown-toc-refresh-toc -->
+**Table of Contents**
+
+- [Registers in different architectures](#registers-in-different-architectures)
+    - [Registers available in architectures](#registers-available-in-architectures)
+        - [x64](#x64)
+
+<!-- markdown-toc end -->
+
+
+# Registers in different architectures
+
+Registers are fixed based on which architecture you are targetting. When the number of bits increases, the number of registers to use should also be higher.
+
+## Registers available in architectures
+
+### x64
+
+| Register name | Size (B) |
+|:-------------:|:--------:|
+| rax           | 8        |
+| rbx           | 8        |
+| rcx           | 8        |
+| rdx           | 8        |
+| rsi           | 8        |
+| rdi           | 8        |
+| rsp           | 8        |
+| rbp           | 8        |
+
+> TODO: support more architectures, and more registers
diff --git a/lib/nsc-core/nsc-core.cabal b/lib/nsc-core/nsc-core.cabal
@@ -0,0 +1,26 @@
+cabal-version: 1.12
+
+-- This file has been generated from package.yaml by hpack version 0.34.2.
+--
+-- see: https://github.com/sol/hpack
+
+name:           nsc-core
+version:        0.0.1.0
+build-type:     Simple
+
+library
+  exposed-modules:
+      Data.Located
+      Language.NStar.Syntax.Core
+      Language.NStar.Typechecker.Core
+  other-modules:
+      Paths_nsc_core
+  hs-source-dirs:
+      src
+  default-extensions: BlockArguments LambdaCase BinaryLiterals
+  build-depends:
+      base >=4.7 && <5
+    , containers >=0.6
+    , diagnose
+    , text >=1.2 && <1.4
+  default-language: Haskell2010
diff --git a/lib/nsc-core/package.yaml b/lib/nsc-core/package.yaml
@@ -0,0 +1,13 @@
+name: nsc-core
+version: !include "../../version.yaml"
+
+library:
+  source-dirs: src
+
+default-extensions: !include "../../default-extensions.yaml"
+
+dependencies:
+- base >=4.7 && <5
+- diagnose
+- text >=1.2 && <1.4
+- containers >=0.6
diff --git a/lib/nsc-core/src/Data/Located.hs b/lib/nsc-core/src/Data/Located.hs
@@ -0,0 +1,24 @@
+module Data.Located
+( module Text.Diagnose.Position
+, Located(..)
+, unLoc
+) where
+
+import Text.Diagnose.Position
+
+-- | A simple data type to hold some location to keep track off.
+data Located a
+  = a :@ Position
+  deriving (Show)
+
+infix 3 :@
+
+instance Eq (Located a) where
+  (_ :@ p1) == (_ :@ p2) = p1 == p2
+
+instance Ord (Located a) where
+  (_ :@ p1) <= (_ :@ p2) = p1 <= p2
+
+-- | Removes extra position information bundled with a value.
+unLoc :: Located a -> a
+unLoc ~(x :@ _) = x
diff --git a/lib/nsc-core/src/Language/NStar/Syntax/Core.hs b/lib/nsc-core/src/Language/NStar/Syntax/Core.hs
@@ -0,0 +1,198 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+
+{-|
+  Module: Language.NStar.Syntax.Core
+  Description: NStar's syntactic core language
+  Copyright: (c) Mesabloo, 2020
+  License: BSD3
+  Stability: experimental
+
+  This module contains all the definitions of all AST nodes and Tokens that will be
+  used across the whole compiler.
+
+  The translation from the source code to the syntactic core should be almost a 1:1 conversion,
+  so a program can be printed back as it was. Both lexing and parsing steps must be reversible steps.
+-}
+
+module Language.NStar.Syntax.Core
+where
+
+import Data.Located
+import Data.Text (Text)
+import Data.Map (Map)
+import Numeric.Natural (Natural)
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+
+newtype Program
+  = Program [Located Statement]  -- ^ A program is a possibily empty list of statements
+
+-- | A statement is either
+data Statement where
+  -- | A typed label
+  Label :: Located Text           -- ^ The label's name. It may not be empty
+        -> Located Type           -- ^ The "label's type", describing the minimal type expected when jumping to this label
+        -> Statement
+  -- | An instruction call
+  Instr :: Instruction -> Statement
+
+data Type where
+  -- | Signed integer
+  Signed :: Natural                                        -- ^ The size of the integer (a multiple of 2 greater than 4)
+         -> Type
+  -- | Unsigned integer
+  Unsigned :: Natural                                      -- ^ The size of the integer (a multiple of 2 greater than 4)
+           -> Type
+  -- | Stack constructor
+  Cons :: Located Type                                     -- ^ Stack head
+       -> Located Type                                     -- ^ Stack tail
+       -> Type
+  -- | Type variable
+  Var :: Located Text                                      -- ^ The name of the type variable
+      -> Type
+  -- | Record type
+  Record :: Map (Located Register) (Located Type)          -- ^ A mapping from 'Register's to their expected 'Type's
+         -> Type
+  -- | Pointer to a normal type
+  Ptr :: Located Type
+      -> Type
+  -- | Pointer to a stack type
+  --
+  -- We separate stack pointers from normal pointers
+  -- because they are used as a different construct
+  -- in the source code (@*ty@ vs @sptr sty@)
+  SPtr :: Located Type
+       -> Type
+  -- | Forall type variable binder
+  ForAll :: [(Located Type, Located Kind)]                  -- ^ Variables along with their 'Kind's
+         -> Located Type
+         -> Type
+
+data Kind where
+  -- | Kind of 8-bytes big types
+  T8 :: Kind
+  -- | Kind of stack types
+  Ts :: Kind
+  -- | Kind of unsized types
+  Ta :: Kind
+
+data Register where
+  -- | 64 bits single-value registers
+  RAX, RBX, RCX, RDX, RDI, RSI, RBP :: Register
+  -- | 64 bits stack registers
+  RSP :: Register
+  {- We do not permit using registers like %rip, %flags, etc.
+     Those are used internally by some instructions.
+  -}
+
+-- | N*'s instruction set
+data Instruction where
+  -- | @mov a, v@ is the same as @a <- v@.
+  MOV :: Located Expr         -- ^ The destination of the move. It must be addressable
+      -> Located Expr         -- ^ The source value moved into the destination
+      -> Instruction
+  -- | @ret@ returns the value in @'RAX'@ to the caller.
+  RET :: Instruction
+
+  -- TODO: add more instructions
+
+data Expr where
+  -- | An immediate value (@$⟨val⟩@)
+  Imm :: Located Immediate          -- ^ \- @⟨val⟩@
+      -> Expr
+  -- | A label name
+  Name :: Located Text
+       -> Expr
+  -- | An indexed expression (@⟨idx⟩[⟨expr⟩]@)
+  Indexed :: Located Integer        -- ^ \- @⟨idx⟩@
+          -> Located Expr           -- ^ \- @⟨expr⟩@
+          -> Expr
+  -- | A register (one of the available 'Register's)
+  Reg :: Located Register
+      -> Expr
+  -- | Type specialization (used on a @call@) (@⟨expr⟩\<⟨type⟩\>@)
+  Spec :: Located Expr             -- ^ \- @⟨expr⟩@
+       -> Located Type             -- ^ \- @⟨type⟩@
+       -> Expr
+
+data Immediate where
+  -- | An integer, either in decimal, hexadecimal, octal or binary format
+  --
+  -- Grammars are:
+  --
+  -- * Binary: @0(b|B)(0|1)⁺@
+  -- * Octal: @0(o|O)(0..7)⁺@
+  -- * Decimal: @(0..9)⁺@
+  -- * Hexadecimal: @0(x|X)(0..9|A..F|a..f)⁺@
+  I :: Integer -> Immediate
+  -- | A character (which can be an escape sequence)
+  C :: Char -> Immediate
+
+
+------------------------------------------------------------------------------------------------
+
+data Token where
+  -- Literals
+  -- | A literal integer
+  Integer :: Text -> Token
+  -- | A literal character
+  Char :: Char -> Token
+  -- | An identifier (also called name)
+  Id :: Text -> Token
+  -- Registers
+  -- | Registers reserved words
+  Rax, Rbx, Rcx, Rdx, Rdi, Rsi, Rsp, Rbp :: Token
+  -- Instructions
+  -- | The @mov@ instruction
+  Mov :: Token
+  -- | The @ret@ instruction
+  Ret :: Token
+  -- TODO: add more instructions
+  -- Symbols
+  -- | Opening symbols @(@, @[@, @{@ and @\<@
+  LParen, LBrace, LBracket, LAngle :: Token
+  -- | Closing symbols @)@, @]@, @}@ and @\>@
+  RParen, RBrace, RBracket, RAngle :: Token
+  -- | Pointer quantifier "@*@"
+  Star :: Token
+  -- | Literal quantifier "@$@"
+  Dollar :: Token
+  -- | Register quantifier "@%@"
+  Percent :: Token
+  -- | Separator "@,@"
+  Comma :: Token
+  -- | Separator "@:@"
+  Colon :: Token
+  -- | Separator "@::@"
+  DoubleColon :: Token
+  -- | Separator "@.@"
+  Dot :: Token
+  -- | Negation "@-@"
+  Minus :: Token
+  -- Keywords
+  -- | \"@forall@\" type variable binder in type
+  Forall :: Token
+  -- | \"@sptr@\" stack pointer quantifier
+  Sptr :: Token
+  -- Comments
+  -- | A comment starting with "@#@" and spanning until the end of the current line
+  InlineComment :: Text        -- ^ The content of the comment
+                -> Token
+  -- | A comment starting with "@/\*@" and ending with "@\*/@"
+  MultilineComment :: Text     -- ^ The content of the comment
+                   -> Token
+  -- | End Of Line
+  EOL :: Token
+  -- | End Of File
+  EOF :: Token
+
+deriving instance Show Token
+deriving instance Eq Token
+deriving instance Ord Token
+deriving instance Data Token
+deriving instance Typeable Token
+
+-- | A token with some location information attached
+type LToken = Located Token
diff --git a/lib/nsc-core/src/Language/NStar/Typechecker/Core.hs b/lib/nsc-core/src/Language/NStar/Typechecker/Core.hs
@@ -0,0 +1,14 @@
+{-|
+  Module: Language.NStar.Typechecker.Core
+  Description: NStar's typechecking core language
+  Copyright: (c) Mesabloo, 2020
+  License: BSD3
+  Stability: experimental
+-}
+
+module Language.NStar.Typechecker.Core
+( -- * Re-exports
+  module Language.NStar.Syntax.Core
+) where
+
+import Language.NStar.Syntax.Core (Type(..), Kind(..))