Skip to content

Commit

Permalink
Merge pull request #7 from nihil-lang/develop
Browse files Browse the repository at this point in the history
Merge working parser into master
  • Loading branch information
Mesabloo authored Sep 2, 2020
2 parents 0bcb8bb + 75be47d commit 30512e9
Show file tree
Hide file tree
Showing 33 changed files with 1,242 additions and 68 deletions.
30 changes: 28 additions & 2 deletions app/Main.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,32 @@
{-# OPTIONS_GHC -Wno-unused-imports #-}

module Main where

import Lib
import Language.NStar.Syntax (lexFile, parseFile)
import Text.Diagnose (printDiagnostic, (<~<))
import System.IO (stderr, stdout, hPrint, hPutStr)
import GHC.ResponseFile (getArgsWithResponseFiles)
import qualified Data.Text as Text
import qualified Data.Text.IO as Text

main :: IO ()
main = someFunc
main = do
-- TODO: proper error handling
-- TODO: handle command-line arguments correctly (flags, options, etc)

(file : _) <- getArgsWithResponseFiles
content <- Text.readFile file

tokens <- case lexFile file content of
Left diag -> do
printDiagnostic stderr (diag <~< (file, lines $ Text.unpack content))
error "Lexer failed with exit code -1"
Right res -> pure res

ast <- case parseFile file tokens of
Left diag -> do
printDiagnostic stderr (diag <~< (file, lines $ Text.unpack content))
error "Parser failed with exit code -1"
Right res -> pure res

pure ()
3 changes: 3 additions & 0 deletions default-extensions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- BlockArguments
- LambdaCase
- BinaryLiterals
22 changes: 22 additions & 0 deletions docs/escape-sequences.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Escape sequences in N*

Escape sequences are special characters prefixed with `\` that give them special power. Most of the escape sequences are used to input invisible control characters like a line feed or a null character.

Escape sequences in N* follow closely those available in C:

| Escape sequence | Description |
|:---------------:|--------------------------------------------------------------------------------------------------------------|
| `\a` | Bell, makes an audible ding |
| `\b` | Backspace character, go back one character on the display |
| `\e` | Escape character, used in Unix terminals for the ANSI color codes |
| `\f` | Form feed page break |
| `\n` | Newline, Line feed |
| `\r` | Carriage return |
| `\t` | Horizontal tab |
| `\v` | Vertical tab |
| `\\` | Backslash |
| `\'` | Apostrophe, useful in a character construct `'c'` |
| `\"` | Double quotation mark, useful in a string construct `"..."`<br>Strings are, however, not yet available in N* |
| `\0` | Null character |

> TODO: support unicode escape sequences `\uHHHH` and `\uHHHHHHHH`
30 changes: 30 additions & 0 deletions docs/registers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<!-- markdown-toc start - Don't edit this section. Run M-x markdown-toc-refresh-toc -->
**Table of Contents**

- [Registers in different architectures](#registers-in-different-architectures)
- [Registers available in architectures](#registers-available-in-architectures)
- [x64](#x64)

<!-- markdown-toc end -->


# Registers in different architectures

Registers are fixed based on which architecture you are targetting. When the number of bits increases, the number of registers to use should also be higher.

## Registers available in architectures

### x64

| Register name | Size (B) |
|:-------------:|:--------:|
| rax | 8 |
| rbx | 8 |
| rcx | 8 |
| rdx | 8 |
| rsi | 8 |
| rdi | 8 |
| rsp | 8 |
| rbp | 8 |

> TODO: support more architectures, and more registers
26 changes: 26 additions & 0 deletions lib/nsc-core/nsc-core.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
cabal-version: 1.12

-- This file has been generated from package.yaml by hpack version 0.34.2.
--
-- see: https://github.com/sol/hpack

name: nsc-core
version: 0.0.1.0
build-type: Simple

library
exposed-modules:
Data.Located
Language.NStar.Syntax.Core
Language.NStar.Typechecker.Core
other-modules:
Paths_nsc_core
hs-source-dirs:
src
default-extensions: BlockArguments LambdaCase BinaryLiterals
build-depends:
base >=4.7 && <5
, containers >=0.6
, diagnose
, text >=1.2 && <1.4
default-language: Haskell2010
13 changes: 13 additions & 0 deletions lib/nsc-core/package.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: nsc-core
version: !include "../../version.yaml"

library:
source-dirs: src

default-extensions: !include "../../default-extensions.yaml"

dependencies:
- base >=4.7 && <5
- diagnose
- text >=1.2 && <1.4
- containers >=0.6
24 changes: 24 additions & 0 deletions lib/nsc-core/src/Data/Located.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
module Data.Located
( module Text.Diagnose.Position
, Located(..)
, unLoc
) where

import Text.Diagnose.Position

-- | A simple data type to hold some location to keep track off.
data Located a
= a :@ Position
deriving (Show)

infix 3 :@

instance Eq (Located a) where
(_ :@ p1) == (_ :@ p2) = p1 == p2

instance Ord (Located a) where
(_ :@ p1) <= (_ :@ p2) = p1 <= p2

-- | Removes extra position information bundled with a value.
unLoc :: Located a -> a
unLoc ~(x :@ _) = x
198 changes: 198 additions & 0 deletions lib/nsc-core/src/Language/NStar/Syntax/Core.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE DeriveDataTypeable #-}

{-|
Module: Language.NStar.Syntax.Core
Description: NStar's syntactic core language
Copyright: (c) Mesabloo, 2020
License: BSD3
Stability: experimental
This module contains all the definitions of all AST nodes and Tokens that will be
used across the whole compiler.
The translation from the source code to the syntactic core should be almost a 1:1 conversion,
so a program can be printed back as it was. Both lexing and parsing steps must be reversible steps.
-}

module Language.NStar.Syntax.Core
where

import Data.Located
import Data.Text (Text)
import Data.Map (Map)
import Numeric.Natural (Natural)
import Data.Data (Data)
import Data.Typeable (Typeable)

newtype Program
= Program [Located Statement] -- ^ A program is a possibily empty list of statements

-- | A statement is either
data Statement where
-- | A typed label
Label :: Located Text -- ^ The label's name. It may not be empty
-> Located Type -- ^ The "label's type", describing the minimal type expected when jumping to this label
-> Statement
-- | An instruction call
Instr :: Instruction -> Statement

data Type where
-- | Signed integer
Signed :: Natural -- ^ The size of the integer (a multiple of 2 greater than 4)
-> Type
-- | Unsigned integer
Unsigned :: Natural -- ^ The size of the integer (a multiple of 2 greater than 4)
-> Type
-- | Stack constructor
Cons :: Located Type -- ^ Stack head
-> Located Type -- ^ Stack tail
-> Type
-- | Type variable
Var :: Located Text -- ^ The name of the type variable
-> Type
-- | Record type
Record :: Map (Located Register) (Located Type) -- ^ A mapping from 'Register's to their expected 'Type's
-> Type
-- | Pointer to a normal type
Ptr :: Located Type
-> Type
-- | Pointer to a stack type
--
-- We separate stack pointers from normal pointers
-- because they are used as a different construct
-- in the source code (@*ty@ vs @sptr sty@)
SPtr :: Located Type
-> Type
-- | Forall type variable binder
ForAll :: [(Located Type, Located Kind)] -- ^ Variables along with their 'Kind's
-> Located Type
-> Type

data Kind where
-- | Kind of 8-bytes big types
T8 :: Kind
-- | Kind of stack types
Ts :: Kind
-- | Kind of unsized types
Ta :: Kind

data Register where
-- | 64 bits single-value registers
RAX, RBX, RCX, RDX, RDI, RSI, RBP :: Register
-- | 64 bits stack registers
RSP :: Register
{- We do not permit using registers like %rip, %flags, etc.
Those are used internally by some instructions.
-}

-- | N*'s instruction set
data Instruction where
-- | @mov a, v@ is the same as @a <- v@.
MOV :: Located Expr -- ^ The destination of the move. It must be addressable
-> Located Expr -- ^ The source value moved into the destination
-> Instruction
-- | @ret@ returns the value in @'RAX'@ to the caller.
RET :: Instruction

-- TODO: add more instructions

data Expr where
-- | An immediate value (@$⟨val⟩@)
Imm :: Located Immediate -- ^ \- @⟨val⟩@
-> Expr
-- | A label name
Name :: Located Text
-> Expr
-- | An indexed expression (@⟨idx⟩[⟨expr⟩]@)
Indexed :: Located Integer -- ^ \- @⟨idx⟩@
-> Located Expr -- ^ \- @⟨expr⟩@
-> Expr
-- | A register (one of the available 'Register's)
Reg :: Located Register
-> Expr
-- | Type specialization (used on a @call@) (@⟨expr⟩\<⟨type⟩\>@)
Spec :: Located Expr -- ^ \- @⟨expr⟩@
-> Located Type -- ^ \- @⟨type⟩@
-> Expr

data Immediate where
-- | An integer, either in decimal, hexadecimal, octal or binary format
--
-- Grammars are:
--
-- * Binary: @0(b|B)(0|1)⁺@
-- * Octal: @0(o|O)(0..7)⁺@
-- * Decimal: @(0..9)⁺@
-- * Hexadecimal: @0(x|X)(0..9|A..F|a..f)⁺@
I :: Integer -> Immediate
-- | A character (which can be an escape sequence)
C :: Char -> Immediate


------------------------------------------------------------------------------------------------

data Token where
-- Literals
-- | A literal integer
Integer :: Text -> Token
-- | A literal character
Char :: Char -> Token
-- | An identifier (also called name)
Id :: Text -> Token
-- Registers
-- | Registers reserved words
Rax, Rbx, Rcx, Rdx, Rdi, Rsi, Rsp, Rbp :: Token
-- Instructions
-- | The @mov@ instruction
Mov :: Token
-- | The @ret@ instruction
Ret :: Token
-- TODO: add more instructions
-- Symbols
-- | Opening symbols @(@, @[@, @{@ and @\<@
LParen, LBrace, LBracket, LAngle :: Token
-- | Closing symbols @)@, @]@, @}@ and @\>@
RParen, RBrace, RBracket, RAngle :: Token
-- | Pointer quantifier "@*@"
Star :: Token
-- | Literal quantifier "@$@"
Dollar :: Token
-- | Register quantifier "@%@"
Percent :: Token
-- | Separator "@,@"
Comma :: Token
-- | Separator "@:@"
Colon :: Token
-- | Separator "@::@"
DoubleColon :: Token
-- | Separator "@.@"
Dot :: Token
-- | Negation "@-@"
Minus :: Token
-- Keywords
-- | \"@forall@\" type variable binder in type
Forall :: Token
-- | \"@sptr@\" stack pointer quantifier
Sptr :: Token
-- Comments
-- | A comment starting with "@#@" and spanning until the end of the current line
InlineComment :: Text -- ^ The content of the comment
-> Token
-- | A comment starting with "@/\*@" and ending with "@\*/@"
MultilineComment :: Text -- ^ The content of the comment
-> Token
-- | End Of Line
EOL :: Token
-- | End Of File
EOF :: Token

deriving instance Show Token
deriving instance Eq Token
deriving instance Ord Token
deriving instance Data Token
deriving instance Typeable Token

-- | A token with some location information attached
type LToken = Located Token
14 changes: 14 additions & 0 deletions lib/nsc-core/src/Language/NStar/Typechecker/Core.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{-|
Module: Language.NStar.Typechecker.Core
Description: NStar's typechecking core language
Copyright: (c) Mesabloo, 2020
License: BSD3
Stability: experimental
-}

module Language.NStar.Typechecker.Core
( -- * Re-exports
module Language.NStar.Syntax.Core
) where

import Language.NStar.Syntax.Core (Type(..), Kind(..))
Loading

0 comments on commit 30512e9

Please sign in to comment.