From 64aebcd936a1be5d89ea8727cdbb50100f45b886 Mon Sep 17 00:00:00 2001 From: dylan Date: Thu, 5 May 2022 18:09:21 +0800 Subject: [PATCH 1/5] docs: add zero-api-sepc.md --- doc/apispec.md | 326 +++++++++++++++++++++++++++++++++++++++++++++++++ go.mod | 1 + token/token.go | 100 +++++++++++++++ token/types.go | 77 ++++++++++++ 4 files changed, 504 insertions(+) create mode 100644 doc/apispec.md create mode 100644 token/token.go create mode 100644 token/types.go diff --git a/doc/apispec.md b/doc/apispec.md new file mode 100644 index 0000000..c4f6f34 --- /dev/null +++ b/doc/apispec.md @@ -0,0 +1,326 @@ +[TOC] + +# 介绍(Introduction) +zero-api 是一种声明 HTTP API 的语言,他可以通过 goctl 翻译成基于 go-zero 的服务代码。 + +# 标记(Notation) +语法使用 Extended Backus-Naur Form (EBNF)指定, 实例如下: + +```EBNF +Production = production_name "=" [ Expression ] "." . +Expression = Alternative { "|" Alternative } . +Alternative = Term { Term } . +Term = production_name | token [ "…" token ] | Group | Option | Repetition . +Group = "(" Expression ")" . +Option = "[" Expression "]" . +Repetition = "{" Expression "}" . +``` + +结果是由术语和以下操作符构造的表达式,优先级递增: + +```EBNF +| 交替 +() 分组 +[] 可选(0 或 1 次) +{} 重复(0 到 n 次) +``` + +# 源码表示(Source code representation) +原代码需要使用 UTF-8 编码,我们会区分大小写。 + +## 字符(characters) +以下术语用来表示特定的 Unicode 字符类: + +```EBNF +newline = /* Unicode 码位U+000A */ . +unicode_char = /* 除了换行符的其他任意 Unicode 码位 */ . +unicode_letter = /* 分类为 “Letter” 的 Unicode 码位 */ . +unicode_digit = /* 分类为“Number, decimal digit”的Unicode码位 */ . +``` + +## 字母与数字(Letters and digits) +下划线 _(U+005F) 被认为字母。 + +```EBNF +letter = unicode_letter | "_" . +decimal_digit = "0" … "9" . +binary_digit = "0" | "1" . +octal_digit = "0" … "7" . +hex_digit = "0" … "9" | "A" … "F" | "a" … "f" . +``` + +# 词法元素 +## 注释 +注释作为文档,有 1 种格式: + +1. 单行注释以 **//** 开头,并且在行尾停止。 + +注释不能在 **rune** 和 **string literal** 中出现、 + +## Tokens +Tokens 组成了 API 语言的词汇表。有四个分类: 标识符 、 关键字 、 运算符和标点以及字面值 。 空白 是由空格(U+0020)、水平制表(U+0009)、回车(U+000D)和新行(U+000A)所组成的,空白一般会被忽略,除非它分隔了组合在一起会形成单一 token 的 tokens. 并且,新行或者文件结尾可能会触发 分号 的插入。当把输入的内容区分为 tokens 时,每一个 token 都是可组成有效 token 的最长字符序列。 + +## 分号 +正式的语法使用分号 **;** 作为一定数量的语句终结符。 + +zero-api 会有一定规则来省略分号: +1. 输入的内容被分为 tokens 时,当每一行最后一个 token 为以下 token 时,一个分号会自动插入到其后面 +2. 为了使复杂的语句可以占据在单一一行上,分号也可以在关闭的 ) 或者 } 前被省略 + +为了反应出惯用的使用习惯,本文档中的代码示例将参照这些规则来省略掉分号。 + +## 标识符(Identifiers) +标识符用于命名程序中的实体——比如变量和类型。它是一个或者多个字母和数字的序列组合。标识符的第一个字符必须是一个字母。 + +```EBNT +identifier = letter { letter | unicode_digit } . +``` + +```EBNT_DEMO +a +_x9 +ThisVariableIsExported +αβ +``` + +有一些标识符已经被 预先声明 了。 + +## 字符串字面值(String literals) +字符串字面值代表了通过串联字符序列而获得的字符串 常量 。它有两种形式: 原始 raw 字符串字面值和 解释型 interpreted 字符串字面值。 + +```EBNT +string_lit = raw_string_lit | interpreted_string_lit | identifier . +raw_string_lit = "`" { unicode_char | newline } "`" . +interpreted_string_lit = `"` { unicode_value | byte_value } `"` . +``` + +```EBNT_DEMO +`abc` // 同 "abc" +`\n +\n` // 同 "\\n\n\\n" +"\n" +"\"" // 同 `"` +"Hello, world!\n" +"日本語" +"\u65e5本\U00008a9e" +"\xff\u00FF" +"\uD800" // 非法: 代理了一半 +"\U00110000" // 非法: 无效的 Unicode 码位 +``` + +## zero-api 特殊字符定义 +zero-api 支持一批特殊字符变量,可以以特殊字符开头,数字等开头。 +```EBNT +value_string_lit = unicode_char { unicode_char } . +``` + +```EBNT_DEMO +3s +abc +/api/user/:info +``` + +TODO: +针对 +/api/user/:info(req) + +这种需要解析成 **/api/user/:info**,**(**,**)** 和 **req** 这样的token。 + +## 预声明的标识符 +以下是 zero-api 支持的预声明的标识符 + +```EBNT +Types: +bool float32 float64 int int8 int16 int32 int64 +string uint uint8 uint16 uint32 uint64 + +``` + +```EBNT_DEMO +type User { + Id string + Age int + Name string +} +``` + +# 源文件组织(Source file organization) +每个源文件都是由以下的组成: + +```EBNT +SourceFile = SyntaxDecl ";" { ImportDecl ";" } [ InfoDecl ";" ] { TypeDecl | ServiceDecl ":"} +``` + +## 语法版本(syntax) +语法版本控制 API 的语法版本。 + +```EBNT +SyntaxDecl = "syntax" "=" SyntaxName . +SyntaxName = string_lit . +``` + +语法版本示例 +```EBNT_DEMO +syntax = "v1" +``` + +## 导入声明(Import declarations) +导出声明用于 当前 API 文件导入其他 API 的时候使用。 + +```EBNT +ImportDecl = "import" ( ImportPath | "(" { ImportPath ";" } ")" ) . +ImportPath = string_lit . +``` +## 信息声明(Info declaration) +信息声明用于声明 API 的一些额外信息。 +```EBNT +InfoDecl = "info" "(" { InfoElement ";"} ")" . +InfoElement = identifier ":" ( string_lit | identifier) . +``` + +```EBNT_DEMO +info ( + auth: dylan + desc: `abc +def` +) +``` + +## 类型声明(Type declarations) +一个类型声明绑定了一个标识符(也就是 类型名 )到一个 类型 。目前 API 只支持类型定义。 + +```EBNT +TypeDecl = "type" ( TypeDef | "(" { TypeDef ";" } ")" ) . +TypeDef = identifier Type . +``` + +# 类型(Types) +类型确定一组值。 +```EBNT +Type = TypeName | TypeLit | "(" Type ")" . +TypeName = identifier . +TypeLit = ArrayType | StructType | PointerType | SliceType | MapType . +``` + +语言本身 预先声明 了一些特定的类型名。其它的命名类型则使用 类型声明 或者 类型形参列表 引入。 复合类型 ——数组、结构体、指针、分片、映射——可以由类型字面值构成。 + +## 布尔类型(Boolean types) +布尔类型 代表以预先声明的常量 true 和 false 所表示的布尔真值的集合。预先声明的布尔类型为 **bool** ,这是一个 定义类型 。 + +### 数字类型(Numeric types) +整数 、 浮点数 或 复数 类型分别代表整数、浮点数或复数值的集合。 它们被统称为 数字类型 。 + +```EBNT +uint8 无符号的 8 位整数集合(0 到 255) +uint16 无符号的 16 位整数集合(0 到 65535) +uint32 无符号的 32 位整数集合(0 到 4294967295) +uint64 无符号的 64 位整数集合(0 到 18446744073709551615) + +int8 带符号的 8 位整数集合(-128 到 127) +int16 带符号的 16 位整数集合(-32768 到 32767) +int32 带符号的 32 位整数集合(-2147483648 到 2147483647) +int64 带符号的 64 位整数集合(-9223372036854775808 到 9223372036854775807) + +float32 所有 IEEE-754 标准的 32 位浮点数数字集合 +float64 所有 IEEE-754 标准的 64 位浮点数数字集合 +``` + +还有一部分定一下是与预先声明架构相关 +```EBNT +uint 可以是 32 或 64 位 +int 和 uint 大小相同 +``` + +## 结构体类型(Struct types) +结构是命名元素的序列,称为字段,每个字段有一个名称和一个类型。字段名可以显式指定(IdentifierList)或隐式指定(EmbeddedField)。在结构中,非空字段名必须是唯一的。 +```EBNT +StructType = "{" { FieldDecl ";" } "}" . +FieldDecl = (identifier Type | EmbeddedField) [ Tag ] . +EmbeddedField = [ "*" ] TypeName . +Tag = string_lit . +``` + +// TODO: EmbeddedField 定义是不够明确 + +```EBNT_DEMO +type Foo { + Stude { + Name string + } + Foo Foo +} +``` + +## 指针类型(Pointer Type) +指针类型表示指向一给定类型的 变量 的所有指针的集合,这个给定类型称为该指针的 基础类型 。 +```EBNT +PointerType = "*" Type . +``` + +## 数组类型(ArrayType) +数组是单一类型元素的有序序列,该单一类型称为元素类型。元素的个数被称为数组长度,并且不能为负值。 + +```EBNT +ArrayType = "[" ArrayLength, "]" Type . +ArrayLength = Expression . +``` + +## 分片类型(SliceType) +分片是针对一个底层数组的连续段的描述符,它提供了对该数组内有序序列元素的访问。 +分片类型表示其元素类型的数组的所有分片的集合。元素的数量被称为分片长度,且不能为负。未初始化的分片的值为 nil 。 +```EBNT +SliceType = "[", "]", Type . +``` + +```EBNT_DEMO +type Foo { + Arr []string +} +``` + +## 映射类型(MapType) +映射是由一种类型的元素所组成的无序组,这个类型被称为元素类型, 其元素被一组另一种类型的唯一 键 索引,这个类型被称为键类型。 + +```EBNT +MapType = "map" "[" Type "]" Type . +``` + +# 服务定义 +服务为 API 定义的路由服务,一个 API 可以有多个 Service, 但是必须 ServiceName 必须同名。 +```EBNT +ServiceDecl = (ServiceExtDecl) ServiceBody . + +ServiceBody = "service" value_string_lit "{" { RouteDecl } "}" . +RouteDecl = (Doc) ";" Handler ";" Method Path (Request) (Response) . +Doc = "@doc" string_lit . +Handler = "@handler" identifier . +Method = "get" | "post" | "put" | "head" | "otions" | "delete" | "patch" . +Path = value_string_lit . +Request = "(" identifier ")" . +Response = "returns" "(" identifier ")" . +``` + +```EBNT_DEMO +service user-api { + @doc "" + @handler GetUserInfo + get /api/user/info(req) returns (resp) +} +``` + +TODO: Response 括号 + +## 服务额外扩展定义 +服务 @server 扩展信息定义。 +```EBNT +ServiceExtDecl = "@server" "(" { ServiceExtElement } ")" . +ServiceExtElement = identifier ":" (value_string_lit | string_lit | identifier) . +``` + +```EBNT_DEMO +@server ( + jwt: auth + timeout: 3s +) +``` + diff --git a/go.mod b/go.mod index 9cc6e81..e4a83bf 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,4 @@ module github.com/zeromicro/zero-api go 1.15 + diff --git a/token/token.go b/token/token.go new file mode 100644 index 0000000..ad9955d --- /dev/null +++ b/token/token.go @@ -0,0 +1,100 @@ +package token + +import ( + "go/token" + "strconv" +) + +// Token is the set of lexical tokens of the zero-api programming language. +type Token token.Token + +// The list of tokens. +const ( + // Special tokens + + ILLEGAL Token = iota + EOF + COMMENT + + literal_beg + IDENT + + STRING + VALUE_STRING + literal_end + + operator_beg + LPAREN // ( + LBRACK // [ + LBRACE // { + COMMA // , + PERIOD // . + + RPAREN // ) + RBRACK // ] + RBRACE // } + SEMICOLON // ; + COLON // : + operator_end + + additional_beg + // additional tokens, handled in an ad-hoc manner + + TILDE + additional_end +) + +var tokens = [...]string{ + ILLEGAL: "ILLEGAL", + + EOF: "EOF", + COMMENT: "COMMENT", + IDENT: "IDENT", + STRING: "STRING", + VALUE_STRING: "VALUE_STRING", + + LPAREN: "(", + LBRACK: "[", + LBRACE: "{", + COMMA: ",", + PERIOD: ".", + + RPAREN: ")", + RBRACK: "]", + RBRACE: "}", + SEMICOLON: ";", + COLON: ":", + + TILDE: "~", +} + +// String returns the string corresponding to the token tok. +// For operators, delimiters, and keywords the string is the actual +// token character sequence (e.g., for the token ADD, the string is +// "+"). For all other tokens the string corresponds to the token +// constant name (e.g. for the token IDENT, the string is "IDENT"). +// +func (tok Token) String() string { + s := "" + if 0 <= tok && tok < Token(len(tokens)) { + s = tokens[tok] + } + if s == "" { + s = "token(" + strconv.Itoa(int(tok)) + ")" + } + return s +} + +// Predicates + +// IsLiteral returns true for tokens corresponding to identifiers +// and basic type literals; it returns false otherwise. +// +func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_end } + +// IsOperator returns true for tokens corresponding to operators and +// delimiters; it returns false otherwise. +// +func (tok Token) IsOperator() bool { + return (operator_beg < tok && tok < operator_end) || tok == TILDE +} diff --git a/token/types.go b/token/types.go new file mode 100644 index 0000000..ed3e4b7 --- /dev/null +++ b/token/types.go @@ -0,0 +1,77 @@ +package token + +import "go/token" + +type ( + // Pos is a compact encoding of a source position within a file set. + // It can be converted into a Position for a more convenient, but much + // larger, representation. + // + // The Pos value for a given file is a number in the range [base, base+size], + // where base and size are specified when a file is added to the file set. + // The difference between a Pos value and the corresponding file base + // corresponds to the byte offset of that position (represented by the Pos value) + // from the beginning of the file. Thus, the file base offset is the Pos value + // representing the first byte in the file. + // + // To create the Pos value for a specific source offset (measured in bytes), + // first add the respective file to the current file set using FileSet.AddFile + // and then call File.Pos(offset) for that file. Given a Pos value p + // for a specific file set fset, the corresponding Position value is + // obtained by calling fset.Position(p). + // + // Pos values can be compared directly with the usual comparison operators: + // If two Pos values p and q are in the same file, comparing p and q is + // equivalent to comparing the respective source file offsets. If p and q + // are in different files, p < q is true if the file implied by p was added + // to the respective file set before the file implied by q. + // + Pos = token.Pos + + // ----------------------------------------------------------------------------- + // Positions + + // Position describes an arbitrary source position + // including the file, line, and column location. + // A Position is valid if the line number is > 0. + // + Position = token.Position + + // ----------------------------------------------------------------------------- + // File + + // A File is a handle for a file belonging to a FileSet. + // A File has a name, size, and line offset table. + // + File = token.File + + // ----------------------------------------------------------------------------- + // FileSet + + // A FileSet represents a set of source files. + // Methods of file sets are synchronized; multiple goroutines + // may invoke them concurrently. + // + // The byte offsets for each file in a file set are mapped into + // distinct (integer) intervals, one interval [base, base+size] + // per file. Base represents the first byte in the file, and size + // is the corresponding file size. A Pos value is a value in such + // an interval. By determining the interval a Pos value belongs + // to, the file, its file base, and thus the byte offset (position) + // the Pos value is representing can be computed. + // + // When adding a new file, a file base must be provided. That can + // be any integer value that is past the end of any interval of any + // file already in the file set. For convenience, FileSet.Base provides + // such a value, which is simply the end of the Pos interval of the most + // recently added file, plus one. Unless there is a need to extend an + // interval later, using the FileSet.Base should be used as argument + // for FileSet.AddFile. + // + FileSet = token.FileSet +) + +// NewFileSet creates a new file set. +func NewFileSet() *FileSet { + return token.NewFileSet() +} From 1f577fda6da2c389d3fbf5724ccb4b8a2b39cf25 Mon Sep 17 00:00:00 2001 From: dylan Date: Mon, 9 May 2022 23:45:18 +0800 Subject: [PATCH 2/5] update: add scanner and TODO.md --- doc/TODO.md | 57 ++++ doc/apispec.md | 55 +-- go.mod | 1 - go.sum | 0 scanner/error.go | 38 +++ scanner/example_test.go | 40 +++ scanner/is.go | 21 ++ scanner/scann.go | 260 +++++++++++++++ scanner/scanner.go | 304 +++++++++++++++++ scanner/scanner_test.go | 717 ++++++++++++++++++++++++++++++++++++++++ token/token.go | 29 +- token/token_test.go | 30 ++ 12 files changed, 1516 insertions(+), 36 deletions(-) create mode 100644 doc/TODO.md create mode 100644 go.sum create mode 100644 scanner/error.go create mode 100644 scanner/example_test.go create mode 100644 scanner/is.go create mode 100644 scanner/scann.go create mode 100644 scanner/scanner.go create mode 100644 scanner/scanner_test.go create mode 100644 token/token_test.go diff --git a/doc/TODO.md b/doc/TODO.md new file mode 100644 index 0000000..5ddad17 --- /dev/null +++ b/doc/TODO.md @@ -0,0 +1,57 @@ +# 1. req resp 的定义 +背景: 目前我们 req 与 resp 的定义是使用 go struct 的定义规则,但是存在的问题是 json key 是使用 tag 中的定义, +这样非 go 相关的同学可能不太熟悉这个规则,同时这个 key 编写起来太麻烦了,需要定义2边,一次 field,一次 tag 中。 + +```go +type User { + Id int64 `json:"id"` +} + +type User { + id int64 +} +``` + +# 2. resp 相关定义 +目前我们的 response 可以用过 httpx.Ok 进行返回,也有部分会改下 response 的方式,例如会返回 **{"code": 0, "data": {}}** +但是这种格式无法在 API 中提现出来。 + + +# 3. api 中 error 相关的返回。 +目前针对 API 的 error 无法体现出来。 +post /user/login(req) returns (resp) +返回的错误码,错误格式,错误描述等信息无法通过 API 体现 + +同时一种可能的格式 +```api +post /foo(req) returns (resp, error) +``` + +# 4. type 是否需要支持 interface{} any +部分同学在使用的过程中,想要通过 interface{} 或者 any 表示任意对象 + +这种在 zero-api 中是不被支持的,我们推荐使用严格标准的表示方法,将具体的信息表示出来。 + + +# 5. handler 同名支持 + +```api +@handler foo +get /foo(req) + +@handler foo +post /foo(req) + + +// one +@handler foo +get|post /foo(req) + + +@handler foo +get /foo(req) +post /foo(req) + +``` +这种不会支持,建议直接定义不同的 handler 自行处理 + \ No newline at end of file diff --git a/doc/apispec.md b/doc/apispec.md index c4f6f34..ae7b386 100644 --- a/doc/apispec.md +++ b/doc/apispec.md @@ -70,20 +70,40 @@ zero-api 会有一定规则来省略分号: 为了反应出惯用的使用习惯,本文档中的代码示例将参照这些规则来省略掉分号。 ## 标识符(Identifiers) -标识符用于命名程序中的实体——比如变量和类型。它是一个或者多个字母和数字的序列组合。标识符的第一个字符必须是一个字母。 +标识符用于命名程序中的实体,或者部分字符值变量。它可以使用任意非空字符串组合起来。 + +如果标识符第一个字符是字母,并且其中不包含特殊字符,可以将其使用在变量和类型命名中。 ```EBNT -identifier = letter { letter | unicode_digit } . +identifier = unicode_char { unicode_char } . ``` ```EBNT_DEMO -a +a // can use as ident _x9 ThisVariableIsExported αβ + +3s +/path/:user ``` -有一些标识符已经被 预先声明 了。 +注意: 因为 ':' 是一个特特殊标点,同时我们有一个 "/path/:user" 需要解析成一个完整的标识符。 +所以一个单独的 **:** 会解析成一个标点,如果跟在 **/:** 会解析成一个标识符。 + +**/api/user/:info** 应该解析成 "/api/user/:info" + +**jwt: auth** 应该解析成 "jwt" ":" "auth" + +## 运算符与标点(Operators and punctuation) +如下的字符用于表示标点: +```EBNT +( ) +[ ] +{ } +, ; +: = +``` ## 字符串字面值(String literals) 字符串字面值代表了通过串联字符序列而获得的字符串 常量 。它有两种形式: 原始 raw 字符串字面值和 解释型 interpreted 字符串字面值。 @@ -108,32 +128,13 @@ interpreted_string_lit = `"` { unicode_value | byte_value } `"` . "\U00110000" // 非法: 无效的 Unicode 码位 ``` -## zero-api 特殊字符定义 -zero-api 支持一批特殊字符变量,可以以特殊字符开头,数字等开头。 -```EBNT -value_string_lit = unicode_char { unicode_char } . -``` - -```EBNT_DEMO -3s -abc -/api/user/:info -``` - -TODO: -针对 -/api/user/:info(req) - -这种需要解析成 **/api/user/:info**,**(**,**)** 和 **req** 这样的token。 - ## 预声明的标识符 -以下是 zero-api 支持的预声明的标识符 +以下是 zero-api 支持的预声明的标识符,目前 zero-api 仅仅支持预声明和自定义的标识符。 ```EBNT Types: bool float32 float64 int int8 int16 int32 int64 string uint uint8 uint16 uint32 uint64 - ``` ```EBNT_DEMO @@ -290,12 +291,12 @@ MapType = "map" "[" Type "]" Type . ```EBNT ServiceDecl = (ServiceExtDecl) ServiceBody . -ServiceBody = "service" value_string_lit "{" { RouteDecl } "}" . +ServiceBody = "service" identifier "{" { RouteDecl } "}" . RouteDecl = (Doc) ";" Handler ";" Method Path (Request) (Response) . Doc = "@doc" string_lit . Handler = "@handler" identifier . Method = "get" | "post" | "put" | "head" | "otions" | "delete" | "patch" . -Path = value_string_lit . +Path = identifier . Request = "(" identifier ")" . Response = "returns" "(" identifier ")" . ``` @@ -314,7 +315,7 @@ TODO: Response 括号 服务 @server 扩展信息定义。 ```EBNT ServiceExtDecl = "@server" "(" { ServiceExtElement } ")" . -ServiceExtElement = identifier ":" (value_string_lit | string_lit | identifier) . +ServiceExtElement = identifier ":" (string_lit | identifier) . ``` ```EBNT_DEMO diff --git a/go.mod b/go.mod index e4a83bf..9cc6e81 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,3 @@ module github.com/zeromicro/zero-api go 1.15 - diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29 diff --git a/scanner/error.go b/scanner/error.go new file mode 100644 index 0000000..6a558df --- /dev/null +++ b/scanner/error.go @@ -0,0 +1,38 @@ +package scanner + +import ( + "fmt" + "go/scanner" + "io" +) + +type ( + Error = scanner.Error + + ErrorList = scanner.ErrorList +) + +func (s *Scanner) error(offs int, msg string) { + if s.err != nil { + s.err(s.file.Position(s.file.Pos(offs)), msg) + } + s.ErrorCount++ +} + +func (s *Scanner) errorf(offs int, format string, args ...interface{}) { + s.error(offs, fmt.Sprintf(format, args...)) +} + +// PrintError is a utility function that prints a list of errors to w, +// one error per line, if the err parameter is an ErrorList. Otherwise +// it prints the err string. +// +func PrintError(w io.Writer, err error) { + if list, ok := err.(ErrorList); ok { + for _, e := range list { + _, _ = fmt.Fprintf(w, "%s\n", e) + } + } else if err != nil { + _, _ = fmt.Fprintf(w, "%s\n", err) + } +} diff --git a/scanner/example_test.go b/scanner/example_test.go new file mode 100644 index 0000000..6445736 --- /dev/null +++ b/scanner/example_test.go @@ -0,0 +1,40 @@ +package scanner_test + +import ( + "fmt" + + "github.com/zeromicro/zero-api/scanner" + "github.com/zeromicro/zero-api/token" +) + +func ExampleScanner_Scan() { + // src is the input that we want to tokenize. + src := []byte(`post /foo (Foo) returns (Bar)`) + + // Initialize the scanner. + var s scanner.Scanner + fset := token.NewFileSet() // positions are relative to fset + file := fset.AddFile("", fset.Base(), len(src)) // register input "file" + s.Init(file, src, nil /* no error handler */, scanner.ScanComments) + + // Repeated calls to Scan yield the token sequence found in the input. + for { + pos, tok, lit := s.Scan() + if tok == token.EOF { + break + } + fmt.Printf("%s\t%s\t%q\n", fset.Position(pos), tok, lit) + } + + // output: + // 1:1 IDENT "post" + // 1:6 IDENT " /foo" + // 1:11 ( "" + // 1:12 IDENT "Foo" + // 1:15 ) "" + // 1:17 IDENT "returns" + // 1:25 ( "" + // 1:26 IDENT "Bar" + // 1:29 ) "" + // 1:30 ; "\n" +} diff --git a/scanner/is.go b/scanner/is.go new file mode 100644 index 0000000..3c0f091 --- /dev/null +++ b/scanner/is.go @@ -0,0 +1,21 @@ +package scanner + +func lower(ch rune) rune { return ('a' - 'A') | ch } + +func isPunctuation(ch rune) bool { + switch ch { + case '=', '(', ')', '[', ']', '{', '}', ',', ';', ':': + return true + } + return false +} + +func digitVal(ch rune) int { + switch { + case '0' <= ch && ch <= '9': + return int(ch - '0') + case 'a' <= lower(ch) && lower(ch) <= 'f': + return int(lower(ch) - 'a' + 10) + } + return 16 // larger than any legal digit val +} diff --git a/scanner/scann.go b/scanner/scann.go new file mode 100644 index 0000000..fdbec8e --- /dev/null +++ b/scanner/scann.go @@ -0,0 +1,260 @@ +package scanner + +import ( + "bytes" + "fmt" + "path/filepath" + "strconv" + "unicode" +) + +// ---------------------------------------------------------------------------- +// scanIdentifier +func (s *Scanner) scanIdentifier(quo bool, offs int) string { + + for { + ch := s.ch + if quo && ch == ':' { // because isPunctuation contain ':" , wo should parse '/:user' + quo = s.ch == '/' + s.next() + continue + } + + switch ch := s.ch; { + case ch < 0, + isPunctuation(ch), + ch == '\n' || ch == ' ': + goto exit + } + + quo = s.ch == '/' + s.next() + } +exit: + lit := s.src[offs:s.offset] + return string(lit) +} + +// ---------------------------------------------------------------------------- + +var prefix = []byte("line ") + +// scanComment +func (s *Scanner) scanComment() string { + // initial '/' already consumed; s.ch == '/' || s.ch == '*' + offs := s.offset - 1 // position of initial '/' + next := -1 // position immediately following the comment; < 0 means invalid comment + numCR := 0 + + if s.ch == '/' { + //-style comment + // (the final '\n' is not considered part of the comment) + s.next() + for s.ch != '\n' && s.ch >= 0 { + if s.ch == '\r' { + numCR++ + } + s.next() + } + // if we are at '\n', the position following the comment is afterwards + next = s.offset + if s.ch == '\n' { + next++ + } + goto exit + } + + /*-style comment */ + s.next() + for s.ch >= 0 { + ch := s.ch + if ch == '\r' { + numCR++ + } + s.next() + if ch == '*' && s.ch == '/' { + s.next() + next = s.offset + goto exit + } + } + + s.error(offs, "comment not terminated") + +exit: + lit := s.src[offs:s.offset] + + // On Windows, a (//-comment) line may end in "\r\n". + // Remove the final '\r' before analyzing the text for + // line directives (matching the compiler). Remove any + // other '\r' afterwards (matching the pre-existing be- + // havior of the scanner). + if numCR > 0 && len(lit) >= 2 && lit[1] == '/' && lit[len(lit)-1] == '\r' { + lit = lit[:len(lit)-1] + numCR-- + } + + // interpret line directives + // (//line directives must start at the beginning of the current line) + if next >= 0 /* implies valid comment */ && (lit[1] == '*' || offs == s.lineOffset) && bytes.HasPrefix(lit[2:], prefix) { + s.updateLineInfo(next, offs, lit) + } + + if numCR > 0 { + lit = stripCR(lit, lit[1] == '*') + } + + return string(lit) +} + +// updateLineInfo parses the incoming comment text at offset offs +// as a line directive. If successful, it updates the line info table +// for the position next per the line directive. +func (s *Scanner) updateLineInfo(next, offs int, text []byte) { + // extract comment text + if text[1] == '*' { + text = text[:len(text)-2] // lop off trailing "*/" + } + text = text[7:] // lop off leading "//line " or "/*line " + offs += 7 + + i, n, ok := trailingDigits(text) + if i == 0 { + return // ignore (not a line directive) + } + // i > 0 + + if !ok { + // text has a suffix :xxx but xxx is not a number + s.error(offs+i, "invalid line number: "+string(text[i:])) + return + } + + var line, col int + i2, n2, ok2 := trailingDigits(text[:i-1]) + if ok2 { + //line filename:line:col + i, i2 = i2, i + line, col = n2, n + if col == 0 { + s.error(offs+i2, "invalid column number: "+string(text[i2:])) + return + } + text = text[:i2-1] // lop off ":col" + } else { + //line filename:line + line = n + } + + if line == 0 { + s.error(offs+i, "invalid line number: "+string(text[i:])) + return + } + + // If we have a column (//line filename:line:col form), + // an empty filename means to use the previous filename. + filename := string(text[:i-1]) // lop off ":line", and trim white space + if filename == "" && ok2 { + filename = s.file.Position(s.file.Pos(offs)).Filename + } else if filename != "" { + // Put a relative filename in the current directory. + // This is for compatibility with earlier releases. + // See issue 26671. + filename = filepath.Clean(filename) + if !filepath.IsAbs(filename) { + filename = filepath.Join(s.dir, filename) + } + } + + s.file.AddLineColumnInfo(next, filename, line, col) +} + +func trailingDigits(text []byte) (int, int, bool) { + i := bytes.LastIndexByte(text, ':') // look from right (Windows filenames may contain ':') + if i < 0 { + return 0, 0, false // no ":" + } + // i >= 0 + n, err := strconv.ParseUint(string(text[i+1:]), 10, 0) + return i + 1, int(n), err == nil +} + +// ---------------------------------------------------------------------------- +// scanString +func (s *Scanner) scanString() string { + // '"' opening already consumed + offs := s.offset - 1 + + for { + ch := s.ch + if ch == '\n' || ch < 0 { + s.error(offs, "string literal not terminated") + break + } + s.next() + if ch == '"' { + break + } + if ch == '\\' { + s.scanEscape('"') + } + } + return string(s.src[offs:s.offset]) +} + +// scanEscape parses an escape sequence where rune is the accepted +// escaped quote. In case of a syntax error, it stops at the offending +// character (without consuming it) and returns false. Otherwise +// it returns true. +func (s *Scanner) scanEscape(quote rune) bool { + offs := s.offset + + var n int + var base, max uint32 + switch s.ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: + s.next() + return true + case '0', '1', '2', '3', '4', '5', '6', '7': + n, base, max = 3, 8, 255 + case 'x': + s.next() + n, base, max = 2, 16, 255 + case 'u': + s.next() + n, base, max = 4, 16, unicode.MaxRune + case 'U': + s.next() + n, base, max = 8, 16, unicode.MaxRune + default: + msg := "unknown escape sequence" + if s.ch < 0 { + msg = "escape sequence not terminated" + } + s.error(offs, msg) + return false + } + + var x uint32 + for n > 0 { + d := uint32(digitVal(s.ch)) + if d >= base { + msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch) + if s.ch < 0 { + msg = "escape sequence not terminated" + } + s.error(s.offset, msg) + return false + } + x = x*base + d + s.next() + n-- + } + + if x > max || 0xD800 <= x && x < 0xE000 { + s.error(offs, "escape sequence is invalid Unicode code point") + return false + } + + return true +} diff --git a/scanner/scanner.go b/scanner/scanner.go new file mode 100644 index 0000000..6198307 --- /dev/null +++ b/scanner/scanner.go @@ -0,0 +1,304 @@ +package scanner + +import ( + "fmt" + "path/filepath" + "unicode/utf8" + + "github.com/zeromicro/zero-api/token" +) + +const ( + ScanComments Mode = 1 << iota // return comments as COMMENT tokens + dontInsertSemis // do not automatically insert semicolons - for testing only +) + +type ( + // An ErrorHandler may be provided to Scanner.Init. If a syntax error is + // encountered and a handler was installed, the handler is called with a + // position and an error message. The position points to the beginning of + // the offending token. + // + ErrorHandler func(pos token.Position, msg string) + + // A Scanner holds the scanner's internal state while processing + // a given text. It can be allocated as part of another data + // structure but must be initialized via Init before use. + // + Scanner struct { + // immutable state + file *token.File // source file handle + dir string // directory portion of file.Name() + src []byte // source + err ErrorHandler // error reporting; or nil + mode Mode // scanning mode + + // scanning state + ch rune // current character + offset int // character offset + rdOffset int // reading offset (position after current character) + lineOffset int // current line offset + insertSemi bool // insert a semicolon before next newline + + // public state - ok to modify + ErrorCount int // number of errors encountered + } + + // Mode A mode value is a set of flags (or 0). + // They control scanner behavior. + // + Mode uint +) + +func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) { + if file.Size() != len(src) { + panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src))) + } + s.file = file + s.dir, _ = filepath.Split(file.Name()) + s.src = src + s.err = err + s.mode = mode + + s.ch = ' ' + s.offset = 0 + s.rdOffset = 0 + s.lineOffset = 0 + s.insertSemi = false + s.ErrorCount = 0 + + s.next() + if s.ch == bom { + s.next() // ignore BOM at file beginning + } +} + +const ( + bom = 0xFEFF // byte order mark, only permitted as very first character + eof = -1 // end of file +) + +func (s *Scanner) next() { + if s.rdOffset < len(s.src) { + s.offset = s.rdOffset + if s.ch == '\n' { + s.lineOffset = s.offset + s.file.AddLine(s.offset) + } + r, w := rune(s.src[s.rdOffset]), 1 + switch { + case r == 0: + s.error(s.offset, "illegal character NUL") + case r >= utf8.RuneSelf: + // not ASCII + r, w = utf8.DecodeRune(s.src[s.rdOffset:]) + if r == utf8.RuneError && w == 1 { + s.error(s.offset, "illegal UTF-8 encoding") + } else if r == bom && s.offset > 0 { + s.error(s.offset, "illegal byte order mark") + } + } + s.rdOffset += w + s.ch = r + } else { + s.offset = len(s.src) + if s.ch == '\n' { + s.lineOffset = s.offset + s.file.AddLine(s.offset) + } + s.ch = eof + } +} + +func stripCR(b []byte, comment bool) []byte { + c := make([]byte, len(b)) + i := 0 + for j, ch := range b { + // In a /*-style comment, don't strip \r from *\r/ (incl. + // sequences of \r from *\r\r...\r/) since the resulting + // */ would terminate the comment too early unless the \r + // is immediately following the opening /* in which case + // it's ok because /*/ is not closed yet (issue #11151). + if ch != '\r' || comment && i > len("/*") && c[i-1] == '*' && j+1 < len(b) && b[j+1] == '/' { + c[i] = ch + i++ + } + } + return c[:i] +} + +func (s *Scanner) skipWhitespace() { + for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' { + s.next() + } +} + +// Scan scans the next token and returns the token position, the token, +// and its literal string if applicable. The source end is indicated by +// token.EOF. +// +func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { +scanAgain: + s.skipWhitespace() + + pos = s.file.Pos(s.offset) + + if s.ch == -1 { + if s.insertSemi { + s.insertSemi = false + return pos, token.SEMICOLON, "\n" + } + tok = token.EOF + return + } + ch := s.ch + offs := s.offset + s.next() + + insertSemi := false + switch ch { + case '/': + switch s.ch { + case '/', '*': + // comment + if s.insertSemi && s.findLineEnd() { + // reset position to the beginning of the comment + s.ch = '/' + s.offset = s.file.Offset(pos) + s.rdOffset = s.offset + 1 + s.insertSemi = false // newline consumed + return pos, token.SEMICOLON, "\n" + } + comment := s.scanComment() + if s.mode&ScanComments == 0 { + // skip comment + s.insertSemi = false // newline consumed + goto scanAgain + } + tok = token.COMMENT + lit = comment + default: + lit = s.scanIdentifier(true, offs) + tok = token.IDENT + insertSemi = s.insertSemi + } + case '\n': + s.insertSemi = false + return pos, token.SEMICOLON, "\n" + case '"': + insertSemi = true + tok = token.STRING + lit = s.scanString() + case '`': + insertSemi = true + tok = token.STRING + lit = s.scanRawString() + case '=': + tok = token.ASSIGN + case ':': + tok = token.COLON + case ',': + tok = token.COMMA + case ';': + tok = token.SEMICOLON + lit = ";" + case '(': + tok = token.LPAREN + case ')': + insertSemi = true + tok = token.RPAREN + case '[': + tok = token.LBRACK + case ']': + insertSemi = true + tok = token.RBRACK + case '{': + tok = token.LBRACE + case '}': + insertSemi = true + tok = token.RBRACE + case '~': + tok = token.TILDE + default: + lit = s.scanIdentifier(false, offs) + tok = token.IDENT + insertSemi = s.insertSemi + } + + if s.mode&dontInsertSemis == 0 { + s.insertSemi = insertSemi + } + return +} + +func (s *Scanner) findLineEnd() bool { + // initial '/' already consumed + + defer func(offs int) { + // reset scanner state to where it was upon calling findLineEnd + s.ch = '/' + s.offset = offs + s.rdOffset = offs + 1 + s.next() // consume initial '/' again + }(s.offset - 1) + + // read ahead until a newline, EOF, or non-comment token is found + for s.ch == '/' || s.ch == '*' { + if s.ch == '/' { + //-style comment always contains a newline + return true + } + /*-style comment: look for newline */ + s.next() + for s.ch >= 0 { + ch := s.ch + if ch == '\n' { + return true + } + s.next() + if ch == '*' && s.ch == '/' { + s.next() + break + } + } + s.skipWhitespace() // s.insertSemi is set + if s.ch < 0 || s.ch == '\n' { + return true + } + if s.ch != '/' { + // non-comment token + return false + } + s.next() // consume '/' + } + + return false +} + +func (s *Scanner) scanRawString() string { + // '`' opening already consumed + offs := s.offset - 1 + + hasCR := false + for { + ch := s.ch + if ch < 0 { + s.error(offs, "raw string literal not terminated") + break + } + s.next() + if ch == '`' { + break + } + if ch == '\r' { + hasCR = true + } + } + + lit := s.src[offs:s.offset] + if hasCR { + lit = stripCR(lit, false) + } + + return string(lit) +} diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go new file mode 100644 index 0000000..967a351 --- /dev/null +++ b/scanner/scanner_test.go @@ -0,0 +1,717 @@ +package scanner + +import ( + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/zeromicro/zero-api/token" +) + +var fset = token.NewFileSet() + +const /* class */ ( + special = iota + literal + operator +) + +func tokenclass(tok token.Token) int { + switch { + case tok.IsLiteral(): + return literal + case tok.IsOperator(): + return operator + } + return special +} + +type elt struct { + tok token.Token + lit string + class int +} + +var tokens = []elt{ + // Special tokens + {token.COMMENT, "/* a comment */", special}, + {token.COMMENT, "// a comment \n", special}, + {token.COMMENT, "/*\r*/", special}, + {token.COMMENT, "/**\r/*/", special}, // issue 11151 + {token.COMMENT, "/**\r\r/*/", special}, + {token.COMMENT, "//\r\n", special}, + + // Identifiers and basic type literals + {token.IDENT, "foobar", literal}, + {token.IDENT, "a۰۱۸", literal}, + {token.IDENT, "foo६४", literal}, + {token.IDENT, "bar9876", literal}, + {token.IDENT, "ŝ", literal}, // was bug (issue 4000) + {token.IDENT, "ŝfoo", literal}, // was bug (issue 4000) + {token.IDENT, "您好", literal}, + + {token.STRING, "`foobar`", literal}, + {token.STRING, "`" + `foo + bar` + + "`", + literal, + }, + {token.STRING, "`\r`", literal}, + {token.STRING, "`foo\r\nbar`", literal}, + + {token.IDENT, "3s", literal}, + {token.IDENT, "/path/:user", literal}, + + {token.LPAREN, "(", operator}, + {token.LBRACK, "[", operator}, + {token.LBRACE, "{", operator}, + {token.COMMA, ",", operator}, + + {token.RPAREN, ")", operator}, + {token.RBRACK, "]", operator}, + {token.RBRACE, "}", operator}, + {token.SEMICOLON, ";", operator}, + {token.COLON, ":", operator}, + {token.TILDE, "~", operator}, +} + +const whitespace = " \t \n\n\n" // to separate tokens + +var source = func() []byte { + var src []byte + for _, t := range tokens { + src = append(src, t.lit...) + src = append(src, whitespace...) + } + return src +}() + +func newlineCount(s string) int { + n := 0 + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + n++ + } + } + return n +} + +func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) { + pos := fset.Position(p) + // Check cleaned filenames so that we don't have to worry about + // different os.PathSeparator values. + if pos.Filename != expected.Filename && filepath.Clean(pos.Filename) != filepath.Clean(expected.Filename) { + t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename) + } + if pos.Offset != expected.Offset { + t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset) + } + if pos.Line != expected.Line { + t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line) + } + if pos.Column != expected.Column { + t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column) + } +} + +// Verify that calling Scan() provides the correct results. +func TestScan(t *testing.T) { + whitespace_linecount := newlineCount(whitespace) + + // error handler + eh := func(_ token.Position, msg string) { + t.Errorf("error handler called (msg = %s)", msg) + } + + // verify scan + var s Scanner + s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertSemis) + + // set up expected position + epos := token.Position{ + Filename: "", + Offset: 0, + Line: 1, + Column: 1, + } + + index := 0 + for { + pos, tok, lit := s.Scan() + + // check position + if tok == token.EOF { + // correction for EOF + epos.Line = newlineCount(string(source)) + epos.Column = 2 + } + checkPos(t, lit, pos, epos) + + // check token + e := elt{token.EOF, "", special} + if index < len(tokens) { + e = tokens[index] + index++ + } + if tok != e.tok { + t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok) + } + + // check token class + if tokenclass(tok) != e.class { + t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) + } + + // check literal + elit := "" + switch e.tok { + case token.COMMENT: + // no CRs in comments + elit = string(stripCR([]byte(e.lit), e.lit[1] == '*')) + //-style comment literal doesn't contain newline + if elit[1] == '/' { + elit = elit[0 : len(elit)-1] + } + case token.IDENT: + elit = e.lit + case token.SEMICOLON: + elit = ";" + default: + if e.tok.IsLiteral() { + // no CRs in raw string literals + elit = e.lit + if elit[0] == '`' { + elit = string(stripCR([]byte(elit), false)) + } + } + } + if lit != elit { + t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit) + } + + if tok == token.EOF { + break + } + + // update position + epos.Offset += len(e.lit) + len(whitespace) + epos.Line += newlineCount(e.lit) + whitespace_linecount + + } + + if s.ErrorCount != 0 { + t.Errorf("found %d errors", s.ErrorCount) + } +} + +func TestStripCR(t *testing.T) { + for _, test := range []struct{ have, want string }{ + {"//\n", "//\n"}, + {"//\r\n", "//\n"}, + {"//\r\r\r\n", "//\n"}, + {"//\r*\r/\r\n", "//*/\n"}, + {"/**/", "/**/"}, + {"/*\r/*/", "/*/*/"}, + {"/*\r*/", "/**/"}, + {"/**\r/*/", "/**\r/*/"}, + {"/*\r/\r*\r/*/", "/*/*\r/*/"}, + {"/*\r\r\r\r*/", "/**/"}, + } { + got := string(stripCR([]byte(test.have), len(test.have) >= 2 && test.have[1] == '*')) + if got != test.want { + t.Errorf("stripCR(%q) = %q; want %q", test.have, got, test.want) + } + } +} + +func checkSemi(t *testing.T, line string, mode Mode) { + var S Scanner + file := fset.AddFile("TestSemis", fset.Base(), len(line)) + S.Init(file, []byte(line), nil, mode) + pos, tok, lit := S.Scan() + for tok != token.EOF { + if tok == token.ILLEGAL { + // the illegal token literal indicates what + // kind of semicolon literal to expect + semiLit := "\n" + if lit[0] == '#' { + semiLit = ";" + } + // next token must be a semicolon + semiPos := file.Position(pos) + semiPos.Offset++ + semiPos.Column++ + pos, tok, lit = S.Scan() + if tok == token.SEMICOLON { + if lit != semiLit { + t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit) + } + checkPos(t, line, pos, semiPos) + } else { + t.Errorf("bad token for %q: got %s, expected ;", line, tok) + } + } else if tok == token.SEMICOLON { + t.Errorf("bad token for %q: got ;, expected no ;", line) + } + pos, tok, lit = S.Scan() + } +} + +var lines = []string{ + // # indicates a semicolon present in the source + // $ indicates an automatically inserted semicolon + "", + "\ufeff#;", // first BOM is ignored + "#;", + "foo$\n", + "123$\n", + "1.2$\n", + "'x'$\n", + `"x"` + "$\n", + "`x`$\n", + + "+\n", + "-\n", + "*\n", + "/\n", + "%\n", + + "&\n", + "|\n", + "^\n", + "<<\n", + ">>\n", + "&^\n", + + "+=\n", + "-=\n", + "*=\n", + "/=\n", + "%=\n", + + "&=\n", + "|=\n", + "^=\n", + "<<=\n", + ">>=\n", + "&^=\n", + + "&&\n", + "||\n", + "<-\n", + "++$\n", + "--$\n", + + "==\n", + "<\n", + ">\n", + "=\n", + "!\n", + + "!=\n", + "<=\n", + ">=\n", + ":=\n", + "...\n", + + "(\n", + "[\n", + "{\n", + ",\n", + ".\n", + + ")$\n", + "]$\n", + "}$\n", + "#;\n", + ":\n", + + "break$\n", + "case\n", + "chan\n", + "const\n", + "continue$\n", + + "default\n", + "defer\n", + "else\n", + "fallthrough$\n", + "for\n", + + "func\n", + "go\n", + "goto\n", + "if\n", + "import\n", + + "interface\n", + "map\n", + "package\n", + "range\n", + "return$\n", + + "select\n", + "struct\n", + "switch\n", + "type\n", + "var\n", + + "foo$//comment\n", + "foo$//comment", + "foo$/*comment*/\n", + "foo$/*\n*/", + "foo$/*comment*/ \n", + "foo$/*\n*/ ", + + "foo $// comment\n", + "foo $// comment", + "foo $/*comment*/\n", + "foo $/*\n*/", + "foo $/* */ /* \n */ bar$/**/\n", + "foo $/*0*/ /*1*/ /*2*/\n", + + "foo $/*comment*/ \n", + "foo $/*0*/ /*1*/ /*2*/ \n", + "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n", + "foo $/* an EOF terminates a line */", + "foo $/* an EOF terminates a line */ /*", + "foo $/* an EOF terminates a line */ //", + + "package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n", + "package main$", +} + +func TestSemis(t *testing.T) { + // TODO: add semis check? + //for _, line := range lines { + // checkSemi(t, line, 0) + // checkSemi(t, line, ScanComments) + // + // // if the input ended in newlines, the input must tokenize the + // // same with or without those newlines + // for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- { + // checkSemi(t, line[0:i], 0) + // checkSemi(t, line[0:i], ScanComments) + // } + //} +} + +type segment struct { + srcline string // a line of source text + filename string // filename for current token; error message for invalid line directives + line, column int // line and column for current token; error position for invalid line directives +} + +var segments = []segment{ + // exactly one token per line since the test consumes one token per segment + {" line1", "TestLineDirectives", 1, 3}, + {"\nline2", "TestLineDirectives", 2, 1}, + {"\nline3 //line File1.go:100", "TestLineDirectives", 3, 1}, // bad line comment, ignored + {"\nline4", "TestLineDirectives", 4, 1}, + {"\n//line File1.go:100\n line100", "File1.go", 100, 0}, + {"\n//line \t :42\n line1", " \t ", 42, 0}, + {"\n//line File2.go:200\n line200", "File2.go", 200, 0}, + {"\n//line foo\t:42\n line42", "foo\t", 42, 0}, + {"\n //line foo:42\n line43", "foo\t", 44, 0}, // bad line comment, ignored (use existing, prior filename) + {"\n//line foo 42\n line44", "foo\t", 46, 0}, // bad line comment, ignored (use existing, prior filename) + {"\n//line /bar:42\n line45", "/bar", 42, 0}, + {"\n//line ./foo:42\n line46", "foo", 42, 0}, + {"\n//line a/b/c/File1.go:100\n line100", "a/b/c/File1.go", 100, 0}, + {"\n//line c:\\bar:42\n line200", "c:\\bar", 42, 0}, + {"\n//line c:\\dir\\File1.go:100\n line201", "c:\\dir\\File1.go", 100, 0}, + + // tests for new line directive syntax + {"\n//line :100\na1", "", 100, 0}, // missing filename means empty filename + {"\n//line bar:100\nb1", "bar", 100, 0}, + {"\n//line :100:10\nc1", "bar", 100, 10}, // missing filename means current filename + {"\n//line foo:100:10\nd1", "foo", 100, 10}, + + {"\n/*line :100*/a2", "", 100, 0}, // missing filename means empty filename + {"\n/*line bar:100*/b2", "bar", 100, 0}, + {"\n/*line :100:10*/c2", "bar", 100, 10}, // missing filename means current filename + {"\n/*line foo:100:10*/d2", "foo", 100, 10}, + {"\n/*line foo:100:10*/ e2", "foo", 100, 14}, // line-directive relative column + {"\n/*line foo:100:10*/\n\nf2", "foo", 102, 1}, // absolute column since on new line +} + +var dirsegments = []segment{ + // exactly one token per line since the test consumes one token per segment + {" line1", "TestLineDir/TestLineDirectives", 1, 3}, + {"\n//line File1.go:100\n line100", "TestLineDir/File1.go", 100, 0}, +} + +var dirUnixSegments = []segment{ + {"\n//line /bar:42\n line42", "/bar", 42, 0}, +} + +var dirWindowsSegments = []segment{ + {"\n//line c:\\bar:42\n line42", "c:\\bar", 42, 0}, +} + +// Verify that line directives are interpreted correctly. +func TestLineDirectives(t *testing.T) { + testSegments(t, segments, "TestLineDirectives") + testSegments(t, dirsegments, "TestLineDir/TestLineDirectives") + if runtime.GOOS == "windows" { + testSegments(t, dirWindowsSegments, "TestLineDir/TestLineDirectives") + } else { + testSegments(t, dirUnixSegments, "TestLineDir/TestLineDirectives") + } +} + +func testSegments(t *testing.T, segments []segment, filename string) { + var src string + for _, e := range segments { + src += e.srcline + } + + // verify scan + var S Scanner + file := fset.AddFile(filename, fset.Base(), len(src)) + S.Init(file, []byte(src), func(pos token.Position, msg string) { t.Error(Error{pos, msg}) }, dontInsertSemis) + for _, s := range segments { + p, _, lit := S.Scan() + pos := file.Position(p) + checkPos(t, lit, p, token.Position{ + Filename: s.filename, + Offset: pos.Offset, + Line: s.line, + Column: s.column, + }) + } + + if S.ErrorCount != 0 { + t.Errorf("got %d errors", S.ErrorCount) + } +} + +// The filename is used for the error message in these test cases. +// The first line directive is valid and used to control the expected error line. +var invalidSegments = []segment{ + {"\n//line :1:1\n//line foo:42 extra text\ndummy", "invalid line number: 42 extra text", 1, 12}, + {"\n//line :2:1\n//line foobar:\ndummy", "invalid line number: ", 2, 15}, + {"\n//line :5:1\n//line :0\ndummy", "invalid line number: 0", 5, 9}, + {"\n//line :10:1\n//line :1:0\ndummy", "invalid column number: 0", 10, 11}, + {"\n//line :1:1\n//line :foo:0\ndummy", "invalid line number: 0", 1, 13}, // foo is considered part of the filename +} + +// Verify that invalid line directives get the correct error message. +func TestInvalidLineDirectives(t *testing.T) { + // make source + var src string + for _, e := range invalidSegments { + src += e.srcline + } + + // verify scan + var S Scanner + var s segment // current segment + file := fset.AddFile(filepath.Join("dir", "TestInvalidLineDirectives"), fset.Base(), len(src)) + S.Init(file, []byte(src), func(pos token.Position, msg string) { + if msg != s.filename { + t.Errorf("got error %q; want %q", msg, s.filename) + } + if pos.Line != s.line || pos.Column != s.column { + t.Errorf("got position %d:%d; want %d:%d", pos.Line, pos.Column, s.line, s.column) + } + }, dontInsertSemis) + for _, s = range invalidSegments { + S.Scan() + } + + if S.ErrorCount != len(invalidSegments) { + t.Errorf("got %d errors; want %d", S.ErrorCount, len(invalidSegments)) + } +} + +// Verify that initializing the same scanner more than once works correctly. +func TestInit(t *testing.T) { + var s Scanner + + // 1st init + src1 := "if true { }" + f1 := fset.AddFile("src1", fset.Base(), len(src1)) + s.Init(f1, []byte(src1), nil, dontInsertSemis) + if f1.Size() != len(src1) { + t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1)) + } + s.Scan() // if + s.Scan() // true + _, tok, _ := s.Scan() // { + if tok != token.LBRACE { + t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE) + } + + // 2nd init + src2 := "go true { ]" + f2 := fset.AddFile("src2", fset.Base(), len(src2)) + s.Init(f2, []byte(src2), nil, dontInsertSemis) + if f2.Size() != len(src2) { + t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2)) + } + _, tok, str := s.Scan() // go + if tok != token.IDENT && str == "go" { + t.Errorf("bad token: got %s, expected %s", tok, token.IDENT) + } + + if s.ErrorCount != 0 { + t.Errorf("found %d errors", s.ErrorCount) + } +} + +func TestStdErrorHander(t *testing.T) { + const src = "@\n" + // illegal character, cause an error + "@ @\n" + // two errors on the same line + "//line File2:20\n" + + "@\n" + // different file, but same line + "//line File2:1\n" + + "@ @\n" + // same file, decreasing line number + "//line File1:1\n" + + "@ @ @" // original file, line 1 again + + var list ErrorList + eh := func(pos token.Position, msg string) { list.Add(pos, msg) } + + var s Scanner + s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), eh, dontInsertSemis) + for { + if _, tok, _ := s.Scan(); tok == token.EOF { + break + } + } + + if len(list) != s.ErrorCount { + t.Errorf("found %d errors, expected %d", len(list), s.ErrorCount) + } + + if len(list) != 0 { + t.Errorf("found %d raw errors, expected 9", len(list)) + PrintError(os.Stderr, list) + } +} + +type errorCollector struct { + cnt int // number of errors encountered + msg string // last error message encountered + pos token.Position // last error position encountered +} + +func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) { + var s Scanner + var h errorCollector + eh := func(pos token.Position, msg string) { + h.cnt++ + h.msg = msg + h.pos = pos + } + s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis) + _, tok0, lit0 := s.Scan() + if tok0 != tok { + t.Errorf("%q: got %s, expected %s", src, tok0, tok) + } + if tok0 != token.ILLEGAL && lit0 != lit { + t.Errorf("%q: got literal %q, expected %q", src, lit0, lit) + } + cnt := 0 + if err != "" { + cnt = 1 + } + if h.cnt != cnt { + t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt) + } + if h.msg != err { + t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) + } + if h.pos.Offset != pos { + t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos) + } +} + +var errors = []struct { + src string + tok token.Token + pos int + lit string + err string +}{ + {"\a", token.IDENT, 0, "\a", ""}, + {`#`, token.IDENT, 0, "#", ""}, + //{`…`, token.VALUE_STRING, 0, "…", ""}, + + {`""`, token.STRING, 0, `""`, ""}, + {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"}, + {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"}, + {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"}, + {"``", token.STRING, 0, "``", ""}, + {"`", token.STRING, 0, "`", "raw string literal not terminated"}, + {"/**/", token.COMMENT, 0, "/**/", ""}, + {"/*", token.COMMENT, 0, "/*", "comment not terminated"}, + + {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"}, + {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"}, + {"\ufeff\ufeff", token.IDENT, 3, "\ufeff", "illegal byte order mark"}, // only first BOM is ignored + {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored + + {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored + {"abc\x00def", token.IDENT, 3, "abc\u0000def", "illegal character NUL"}, + {"abc\x00", token.IDENT, 3, "abc\u0000", "illegal character NUL"}, +} + +func TestScanErrors(t *testing.T) { + for _, e := range errors { + checkError(t, e.src, e.tok, e.pos, e.lit, e.err) + } +} + +func BenchmarkScan(b *testing.B) { + b.StopTimer() + fset := token.NewFileSet() + file := fset.AddFile("", fset.Base(), len(source)) + var s Scanner + b.StartTimer() + for i := 0; i < b.N; i++ { + s.Init(file, source, nil, ScanComments) + for { + _, tok, _ := s.Scan() + if tok == token.EOF { + break + } + } + } +} + +func BenchmarkScanFiles(b *testing.B) { + // Scan a few arbitrary large files, and one small one, to provide some + // variety in benchmarks. + // TODO: change *.go to *.api + for _, p := range []string{ + "go/types/expr.go", + "go/parser/parser.go", + "net/http/server.go", + "go/scanner/errors.go", + } { + b.Run(p, func(b *testing.B) { + b.StopTimer() + filename := filepath.Join("..", "..", filepath.FromSlash(p)) + src, err := os.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + fset := token.NewFileSet() + file := fset.AddFile(filename, fset.Base(), len(src)) + b.SetBytes(int64(len(src))) + var s Scanner + b.StartTimer() + for i := 0; i < b.N; i++ { + s.Init(file, src, nil, ScanComments) + for { + _, tok, _ := s.Scan() + if tok == token.EOF { + break + } + } + } + }) + } +} diff --git a/token/token.go b/token/token.go index ad9955d..9e311bb 100644 --- a/token/token.go +++ b/token/token.go @@ -3,6 +3,7 @@ package token import ( "go/token" "strconv" + "unicode" ) // Token is the set of lexical tokens of the zero-api programming language. @@ -20,15 +21,14 @@ const ( IDENT STRING - VALUE_STRING literal_end operator_beg + ASSIGN // = LPAREN // ( LBRACK // [ LBRACE // { COMMA // , - PERIOD // . RPAREN // ) RBRACK // ] @@ -47,17 +47,17 @@ const ( var tokens = [...]string{ ILLEGAL: "ILLEGAL", - EOF: "EOF", - COMMENT: "COMMENT", - IDENT: "IDENT", - STRING: "STRING", - VALUE_STRING: "VALUE_STRING", + EOF: "EOF", + COMMENT: "COMMENT", + IDENT: "IDENT", + STRING: "STRING", + ASSIGN: "=", LPAREN: "(", LBRACK: "[", LBRACE: "{", COMMA: ",", - PERIOD: ".", + //PERIOD: ".", RPAREN: ")", RBRACK: "]", @@ -98,3 +98,16 @@ func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_en func (tok Token) IsOperator() bool { return (operator_beg < tok && tok < operator_end) || tok == TILDE } + +// IsIdentifier reports whether name is a Go identifier, that is, a non-empty +// string made up of letters, digits, and underscores, where the first character +// is not a digit. Keywords are not identifiers. +// +func IsIdentifier(name string) bool { + for i, c := range name { + if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) { + return false + } + } + return name != "" +} diff --git a/token/token_test.go b/token/token_test.go new file mode 100644 index 0000000..a578e67 --- /dev/null +++ b/token/token_test.go @@ -0,0 +1,30 @@ +package token + +import "testing" + +func TestIsIdentifier(t *testing.T) { + tests := []struct { + name string + in string + want bool + }{ + {"Empty", "", false}, + {"Space", " ", false}, + {"SpaceSuffix", "foo ", false}, + {"Number", "123", false}, + {"Keyword", "func", false}, + + {"LettersASCII", "foo", true}, + {"MixedASCII", "_bar123", true}, + {"UppercaseKeyword", "Func", true}, + {"LettersUnicode", "fóö", true}, + {"Slash", "/path/user", false}, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if got := IsIdentifier(test.in); got != test.want { + t.Fatalf("IsIdentifier(%q) = %t, want %v", test.in, got, test.want) + } + }) + } +} From 26375ba34a0aa492da0773ee87c14ccbf32b9c94 Mon Sep 17 00:00:00 2001 From: dylan Date: Mon, 16 May 2022 23:41:09 +0800 Subject: [PATCH 3/5] add ast parser --- ast/ast.go | 394 ++++++++++++++++++++++++++++++ doc/TODO.md | 3 + doc/apispec.md | 4 +- parser/error.go | 42 ++++ parser/example_test.go | 25 ++ parser/interface.go | 65 +++++ parser/parse.go | 523 ++++++++++++++++++++++++++++++++++++++++ parser/parser.go | 214 ++++++++++++++++ parser/parser_test.go | 20 ++ parser/trace.go | 29 +++ scanner/example_test.go | 2 +- scanner/scanner.go | 6 +- scanner/scanner_test.go | 8 +- testdata/demo.api | 43 ++++ token/token.go | 4 + token/token_test.go | 2 +- 16 files changed, 1372 insertions(+), 12 deletions(-) create mode 100644 ast/ast.go create mode 100644 parser/error.go create mode 100644 parser/example_test.go create mode 100644 parser/interface.go create mode 100644 parser/parse.go create mode 100644 parser/parser.go create mode 100644 parser/parser_test.go create mode 100644 parser/trace.go create mode 100644 testdata/demo.api diff --git a/ast/ast.go b/ast/ast.go new file mode 100644 index 0000000..38e04eb --- /dev/null +++ b/ast/ast.go @@ -0,0 +1,394 @@ +package ast + +import ( + "go/ast" + + "github.com/zeromicro/zero-api/token" +) + +type ( + Node = ast.Node + + Expr interface { + Node + exprNode() + } + + Decl interface { + Node + declNode() + } +) + +// ---------------------------------------------------------------------------- +// comment + +type ( + Comment = ast.Comment + + CommentGroup = ast.CommentGroup +) + +// ---------------------------------------------------------------------------- +// Expressions and types + +type ( + BadExpr struct { + From, To token.Pos + } + + Ident struct { + NamePos token.Pos + Name string + } + + // BasicLit node represents a literal of basic type. + BasicLit struct { + ValuePos token.Pos // literal position + Kind token.Token // token.STRING or token.Ident + Value string // literal string; e.g. 42, 0x7f, 3.14, 1e-9, 2.4i, 'a', '\x7f', "foo" or `\m\n\o` + } + + // A StarExpr node represents an expression of the form "*" Expression. + // Semantically it could be a unary "*" expression, or a pointer type. + // + StarExpr struct { + Star token.Pos // position of "*" + X Expr // operand + } + + KeyValueExpr struct { + Key *Ident + Colon token.Pos // position of ":" + Value *BasicLit // *BasicLit for info or *Ident for server + } + + // A ParenExpr node represents a parenthesized expression. + // like (req) or (resp) + ParenExpr struct { + Lparen token.Pos // position of "(" + X Expr // parenthesized expression + Rparen token.Pos // position of ")" + } +) + +func (x *BadExpr) Pos() token.Pos { return x.From } +func (x *BadExpr) End() token.Pos { return x.To } +func (x *BadExpr) exprNode() {} + +func (x *Ident) Pos() token.Pos { return x.NamePos } +func (x *Ident) End() token.Pos { return token.Pos(int(x.NamePos) + len(x.Name)) } +func (x *Ident) exprNode() {} + +func (x *BasicLit) Pos() token.Pos { return x.ValuePos } +func (x *BasicLit) End() token.Pos { return token.Pos(int(x.ValuePos) + len(x.Value)) } +func (x *BasicLit) exprNode() {} + +func (x *StarExpr) Pos() token.Pos { return x.Star } +func (x *StarExpr) End() token.Pos { return x.X.End() } +func (x *StarExpr) exprNode() {} + +func (x *KeyValueExpr) Pos() token.Pos { return x.Key.Pos() } +func (x *KeyValueExpr) End() token.Pos { return x.Value.End() } +func (x *KeyValueExpr) exprNode() {} + +func (x *ParenExpr) Pos() token.Pos { return x.Lparen } +func (x *ParenExpr) End() token.Pos { return x.Rparen } +func (x *ParenExpr) exprNode() {} + +type ( + // A Field represents a Field declaration list in a struct type, + // a method list in an interface type, or a parameter/result declaration + // in a signature. + // Field.Names is nil for unnamed parameters (parameter lists which only contain types) + // and embedded struct fields. In the latter case, the field name is the type name. + // Field.Names contains a single name "type" for elements of interface type lists. + // Types belonging to the same type list share the same "type" identifier which also + // records the position of that keyword. + // + Field struct { + Doc *CommentGroup // associated documentation; or nil + Names []*Ident // field/method/(type) parameter names, or type "type"; or nil + Type Expr // field/method/parameter type, type list type; or nil + Tag *BasicLit // field tag; or nil + Comment *CommentGroup // line comments; or nil + } + + FieldList struct { + Lbrace token.Pos + List []*Field + Rbrace token.Pos + } + + // An ArrayType node represents an array or slice type. + ArrayType struct { + Lbrack token.Pos // position of "[" + //Len Expr // Ellipsis node for [...]T array types, nil for slice types + Elt Expr // element type + } + + StructType struct { + Struct token.Pos + Fields *FieldList + //Incomplete bool + } + + // A MapType node represents a map type. + MapType struct { + Map token.Pos // position of "map" keyword + Key Expr + Value Expr + } +) + +func (f *FieldList) Pos() token.Pos { + if f.Lbrace.IsValid() { + return f.Lbrace + } + // the list should not be empty in this case; + // be conservative and guard against bad ASTs + if len(f.List) > 0 { + return f.List[0].Pos() + } + return token.NoPos +} + +func (f *FieldList) End() token.Pos { + if f.Rbrace.IsValid() { + return f.Rbrace + 1 + } + // the list should not be empty in this case; + // be conservative and guard against bad ASTs + if n := len(f.List); n > 0 { + return f.List[n-1].End() + } + return token.NoPos +} + +func (f *Field) Pos() token.Pos { + if len(f.Names) > 0 { + return f.Names[0].Pos() + } + if f.Type != nil { + return f.Type.Pos() + } + return token.NoPos +} + +func (f *Field) End() token.Pos { + if f.Tag != nil { + return f.Tag.End() + } + if f.Type != nil { + return f.Type.End() + } + if len(f.Names) > 0 { + return f.Names[len(f.Names)-1].End() + } + return token.NoPos +} + +// NumFields returns the number of parameters or struct fields represented by a FieldList. +func (f *FieldList) NumFields() int { + n := 0 + if f != nil { + for _, g := range f.List { + m := len(g.Names) + if m == 0 { + m = 1 + } + n += m + } + } + return n +} + +func (x *ArrayType) Pos() token.Pos { return x.Lbrack } +func (x *ArrayType) End() token.Pos { return x.Elt.End() } +func (x *ArrayType) exprNode() {} + +func (x *StructType) Pos() token.Pos { + if x.Struct.IsValid() { + return x.Struct + } + return x.Fields.Pos() +} +func (x *StructType) End() token.Pos { return x.Fields.End() } +func (x *StructType) exprNode() {} + +func (x *MapType) Pos() token.Pos { return x.Map } +func (x *MapType) End() token.Pos { return x.Value.End() } +func (x *MapType) exprNode() {} + +// ---------------------------------------------------------------------------- +// spec + +type ( + // The Spec type stands for any of *ImportSpec, and *TypeSpec. + Spec interface { + Node + specNode() + } + + // An ImportSpec node represents a single package import. + ImportSpec struct { + Doc *CommentGroup // associated documentation; or nil + //Name *Ident // local package name (including "."); or nil + Path *BasicLit // import path + Comment *CommentGroup // line comments; or nil + EndPos token.Pos // end of spec (overrides Path.Pos if nonzero) + } + + // A TypeSpec node represents a type declaration (TypeSpec production). + TypeSpec struct { + Doc *CommentGroup // associated documentation; or nil + Name *Ident // type name + //TypeParams *FieldList // type parameters; or nil + //Assign token.Pos // position of '=', if any + Type *StructType // *Ident, *ParenExpr, *SelectorExpr, *StarExpr, or any of the *XxxTypes + Comment *CommentGroup // line comments; or nil + } +) + +func (x *ImportSpec) Pos() token.Pos { return x.Path.Pos() } +func (x *ImportSpec) End() token.Pos { return x.EndPos } +func (x *ImportSpec) specNode() {} + +func (x *TypeSpec) Pos() token.Pos { return x.Name.Pos() } +func (x *TypeSpec) End() token.Pos { return x.Type.Pos() } +func (x *TypeSpec) specNode() {} + +// ---------------------------------------------------------------------------- +// decl + +type ( + BadDecl struct { + From, To token.Pos + } + + SyntaxDecl struct { + TokPos token.Pos + Assign token.Pos + SyntaxName *BasicLit + } + + GenDecl struct { + Doc *CommentGroup + TokPos token.Pos + Key Keyword // import, type + Lparen token.Pos + Specs []Spec + Rparen token.Pos + } + + InfoDecl struct { + TokPos token.Pos + Lparen token.Pos + Elements []*KeyValueExpr + Rparen token.Pos + } +) + +func (x *BadDecl) Pos() token.Pos { return x.From } +func (x *BadDecl) End() token.Pos { return x.To } +func (x *BadDecl) declNode() {} + +func (x *GenDecl) Pos() token.Pos { return x.TokPos } +func (x *GenDecl) End() token.Pos { + if x.Rparen.IsValid() { + return x.Rparen + 1 + } + return x.Specs[0].End() +} +func (x *GenDecl) declNode() {} + +// ---------------------------------------------------------------------------- +// service + +type ( + ServiceDecl struct { + ServiceExt *ServiceExtDecl + ServiceApi *ServiceApiDecl + } + + ServiceExtDecl struct { + TokPos token.Pos // @server pos + Lparen token.Pos + Kvs []*KeyValueExpr + Rparen token.Pos + } + + ServiceApiDecl struct { + TokPos token.Pos + Name *Ident + Lbrace token.Pos + ServiceRoutes []*ServiceRouteDecl + Rbrace token.Pos + } + + ServiceRouteDecl struct { + TokPos token.Pos + AtDoc *KeyValueExpr + AtHandler *KeyValueExpr + Route *Route + } + + Route struct { + Method *Ident + Path *Ident + Req *ParenExpr + ReturnPos token.Pos // returns pos + Resp *ParenExpr + EndPos token.Pos // because Resp Req is optional, need this for EndPos + } +) + +func (x *ServiceDecl) Pos() token.Pos { + if x.ServiceExt != nil { + return x.ServiceExt.Pos() + } + return x.ServiceApi.Pos() +} +func (x *ServiceDecl) End() token.Pos { + return x.ServiceApi.End() +} +func (x *ServiceDecl) declNode() {} + +func (x *ServiceExtDecl) Pos() token.Pos { return x.TokPos } +func (x *ServiceExtDecl) token() token.Pos { return x.Rparen } + +func (x *ServiceApiDecl) Pos() token.Pos { return x.TokPos } +func (x *ServiceApiDecl) End() token.Pos { return x.Rbrace } + +// ---------------------------------------------------------------------------- +// File + +type File struct { + Doc *CommentGroup + + SyntaxDecl *SyntaxDecl + ImportDecls []*GenDecl + InfoDecl *InfoDecl + Decls []Decl // top-level declarations; or nil; types or service +} + +// ---------------------------------------------------------------------------- +// keyword + +type Keyword string + +const ( + SYNTAX Keyword = "syntax" + IMPORT Keyword = "import" + INFO Keyword = "info" + TYPE Keyword = "type" + SERVICE Keyword = "service" + SERVEREXT Keyword = "@server" + RouteDoc Keyword = "@doc" + RouteHandler Keyword = "@handler" + RouteReturns Keyword = "returns" +) + +func (k Keyword) Is(str string) bool { + return string(k) == str +} diff --git a/doc/TODO.md b/doc/TODO.md index 5ddad17..305d02e 100644 --- a/doc/TODO.md +++ b/doc/TODO.md @@ -54,4 +54,7 @@ post /foo(req) ``` 这种不会支持,建议直接定义不同的 handler 自行处理 + +# 6. type 定义 group +refer: https://github.com/zeromicro/go-zero/issues/1854 \ No newline at end of file diff --git a/doc/apispec.md b/doc/apispec.md index ae7b386..9ea47a4 100644 --- a/doc/apispec.md +++ b/doc/apispec.md @@ -169,8 +169,8 @@ syntax = "v1" 导出声明用于 当前 API 文件导入其他 API 的时候使用。 ```EBNT -ImportDecl = "import" ( ImportPath | "(" { ImportPath ";" } ")" ) . -ImportPath = string_lit . +ImportDecl = "import" ( ImportSpec | "(" { ImportSpec ";" } ")" ) . +ImportSpec = string_lit . ``` ## 信息声明(Info declaration) 信息声明用于声明 API 的一些额外信息。 diff --git a/parser/error.go b/parser/error.go new file mode 100644 index 0000000..417aa02 --- /dev/null +++ b/parser/error.go @@ -0,0 +1,42 @@ +package parser + +import "github.com/zeromicro/zero-api/token" + +type bailout struct { +} + +func (p *parser) error(pos token.Pos, msg string) { + if p.trace { + defer un(trace(p, "error: "+msg)) + } + + position := p.file.Position(pos) + if p.mode&AllErrors == 0 { + n := len(p.errors) + if n > 0 && p.errors[n-1].Pos.Line == position.Line { + return + } + if n > 10 { + panic(bailout{}) + } + } + p.errors.Add(position, msg) +} + +func (p *parser) errorExpected(pos token.Pos, msg string) { + msg = "expected " + msg + if pos == p.pos { + // the error happened at the current position; + // make the error message more specific + switch { + case p.tok == token.SEMICOLON && p.lit == "\n": + msg += ", found newline" + case p.tok.IsLiteral(): + // print 123 rather than 'INT', etc. + msg += ", found " + p.lit + default: + msg += ", found '" + p.tok.String() + "'" + } + } + p.error(pos, msg) +} diff --git a/parser/example_test.go b/parser/example_test.go new file mode 100644 index 0000000..f0d0d8c --- /dev/null +++ b/parser/example_test.go @@ -0,0 +1,25 @@ +package parser_test + +import ( + "fmt" + + "github.com/zeromicro/zero-api/parser" + "github.com/zeromicro/zero-api/token" +) + +func ExampleParseFile() { + fset := token.NewFileSet() + + src := `// api语法版本 +syntax = "v1" +` + f, err := parser.ParseFile(fset, "", src, parser.AllErrors) + if err != nil { + fmt.Println(err) + } + + fmt.Println(f.SyntaxDecl.SyntaxName.Value) + + // output: + // "v1" +} diff --git a/parser/interface.go b/parser/interface.go new file mode 100644 index 0000000..4e495a6 --- /dev/null +++ b/parser/interface.go @@ -0,0 +1,65 @@ +package parser + +import ( + "errors" + "io" + "os" + + "github.com/zeromicro/zero-api/ast" + "github.com/zeromicro/zero-api/token" +) + +type Mode uint + +const ( + iotaMode Mode = 1 << iota + ParseComments + Trace + AllErrors +) + +func readSource(filename string, src interface{}) ([]byte, error) { + if src != nil { + switch s := src.(type) { + case string: + return []byte(s), nil + case []byte: + return s, nil + case io.Reader: + return io.ReadAll(s) + } + return nil, errors.New("invalid source") + } + return os.ReadFile(filename) +} + +func ParseFile(fset *token.FileSet, filename string, src interface{}, mode Mode) (f *ast.File, err error) { + if fset == nil { + panic("parser.ParseFile: not token.FileSet provided (fset == nil)") + } + + text, err := readSource(filename, src) + if err != nil { + return nil, err + } + + var p parser + defer func() { + if e := recover(); err != nil { + if _, ok := e.(bailout); ok { + panic(e) + } + } + + if f == nil { + f = &ast.File{} + } + + p.errors.Sort() + err = p.errors.Err() + }() + + p.init(fset, filename, text, mode) + f = p.parseFile() + return +} diff --git a/parser/parse.go b/parser/parse.go new file mode 100644 index 0000000..7fdfa76 --- /dev/null +++ b/parser/parse.go @@ -0,0 +1,523 @@ +package parser + +import ( + "github.com/zeromicro/zero-api/ast" + "github.com/zeromicro/zero-api/token" +) + +func (p *parser) parseSyntaxDecl() *ast.SyntaxDecl { + if p.trace { + defer un(trace(p, "Syntax")) + } + + pos := p.expect(token.IDENT) + assignPos := p.expect(token.ASSIGN) + + namePos := p.pos + var name string + + if p.tok == token.STRING { + name = p.lit + } + p.expect(token.STRING) + p.expectSemi() + + return &ast.SyntaxDecl{ + TokPos: pos, + Assign: assignPos, + SyntaxName: &ast.BasicLit{ValuePos: namePos, Kind: token.STRING, Value: name}, + } +} + +func (p *parser) parseInfoDecl() *ast.InfoDecl { + if p.trace { + defer un(trace(p, "Info")) + } + pos := p.expect(token.IDENT) + lparen := p.expect(token.LPAREN) + elements := p.parseElementList() + rparen := p.expect(token.RPAREN) + p.expectSemi() + + return &ast.InfoDecl{ + TokPos: pos, + Lparen: lparen, + Elements: elements, + Rparen: rparen, + } +} + +func (p *parser) parseElementList() []*ast.KeyValueExpr { + if p.trace { + defer un(trace(p, "ElementList")) + } + + var kvs []*ast.KeyValueExpr + for p.tok != token.RPAREN && p.tok != token.EOF { + kvs = append(kvs, p.parseElement(true)) + p.expectSemi() + } + return kvs +} + +func (p *parser) parseElement(expectColon bool) *ast.KeyValueExpr { + if p.trace { + defer un(trace(p, "Element")) + } + + key := p.parseIdent(false) + var colonPos token.Pos + if expectColon { + colonPos = p.expect(token.COLON) + } + value := &ast.BasicLit{ + ValuePos: p.pos, + Kind: p.tok, + Value: p.lit, + } + p.next() + + return &ast.KeyValueExpr{ + Key: key, + Colon: colonPos, + Value: value, + } +} + +type parseSpecFunction func(doc *ast.CommentGroup) ast.Spec + +func (p *parser) parseGenDecl(key ast.Keyword, f parseSpecFunction) *ast.GenDecl { + if p.trace { + defer un(trace(p, "GenDecl("+string(key)+")")) + } + + doc := p.leadComment + pos := p.expect(token.IDENT) // import + var lparen, rparen token.Pos + var list []ast.Spec + if p.tok == token.LPAREN { + lparen = p.pos + p.next() + for iota := 0; p.tok != token.RPAREN && p.tok != token.EOF; iota++ { + list = append(list, f(p.leadComment)) + } + rparen = p.expect(token.RPAREN) + p.expectSemi() + } else { + list = append(list, f(doc)) + } + return &ast.GenDecl{ + Doc: doc, + TokPos: pos, + Key: key, + Lparen: lparen, + Specs: list, + Rparen: rparen, + } +} + +func (p *parser) parseImportSpec(doc *ast.CommentGroup) ast.Spec { + if p.trace { + defer un(trace(p, "Import")) + } + pos := p.pos + var path string + if p.tok == token.STRING { + path = p.lit + } + p.expect(token.STRING) + p.expectSemi() + + return &ast.ImportSpec{ + Doc: doc, + Path: &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: path}, + Comment: p.lineComment, + EndPos: 0, + } +} + +func (p *parser) parseIdent(identifier bool) *ast.Ident { + if p.trace { + defer un(trace(p, "Ident")) + } + pos := p.pos + var name string + if p.tok == token.IDENT { + name = p.lit + p.next() + if identifier && !token.IsIdentifier(name) { + p.error(pos, "expect Identifier") + } + } else { + name = "_" + p.expect(token.IDENT) // use expect() error handling + } + + return &ast.Ident{ + NamePos: pos, + Name: name, + } +} + +// ---------------------------------------------------------------------------- +// decl + +func (p *parser) parseDecl() ast.Decl { + if p.trace { + defer un(trace(p, "Decl")) + } + + if p.tok != token.IDENT { + pos := p.pos + p.errorExpected(pos, "expect declaration") + p.advance() + return &ast.BadDecl{ + From: pos, + To: p.pos, + } + } + + switch { + case ast.TYPE.Is(p.lit): + return p.parseGenDecl(ast.TYPE, p.parseTypeSpec) + case ast.SERVICE.Is(p.lit), ast.SERVEREXT.Is(p.lit): + return p.parseService() + } + + pos := p.pos + p.errorExpected(pos, "expect declaration") + p.advance() + return &ast.BadDecl{ + From: pos, + To: p.pos, + } +} + +// ---------------------------------------------------------------------------- +// struct + +func (p *parser) parseTypeSpec(doc *ast.CommentGroup) ast.Spec { + if p.trace { + defer un(trace(p, "TypeSpec")) + } + + ident := p.parseIdent(true) + ty := p.parseStructType() // type spec only support structType + p.expectSemi() + + return &ast.TypeSpec{ + Doc: doc, + Name: ident, + Type: ty, + Comment: p.lineComment, + } +} + +func (p *parser) parseStructType() *ast.StructType { + if p.trace { + defer un(trace(p, "StructType")) + } + + var structPos, lbrace token.Pos + if p.tok == token.LBRACE { + lbrace = p.expect(token.LBRACE) + } else if p.tok == token.IDENT && p.lit == "struct" { + structPos = p.expect(token.IDENT) + lbrace = p.expect(token.LBRACE) + } + var list []*ast.Field + for p.tok == token.IDENT || p.tok == token.MUL || p.tok == token.LPAREN { + list = append(list, p.parseFieldDecl()) + } + rbrace := p.expect(token.RBRACE) + + return &ast.StructType{ + Struct: structPos, + Fields: &ast.FieldList{ + Lbrace: lbrace, + List: list, + Rbrace: rbrace, + }, + } +} + +func (p *parser) parseFieldDecl() *ast.Field { + if p.trace { + defer un(trace(p, "FieldDecl")) + } + + doc := p.leadComment + + var names []*ast.Ident + var typ ast.Expr + if p.tok == token.IDENT { + name := p.parseIdent(true) + if p.tok == token.STRING || p.tok == token.SEMICOLON || p.tok == token.RBRACE { + typ = name + } else { + names := []*ast.Ident{name} + for p.tok == token.COMMA { + p.next() + names = append(names, p.parseIdent(true)) + } + typ = p.parseType() + } + } else { + /* type User { map[string]string } */ + typ = p.parseType() + } + + var tag *ast.BasicLit + if p.tok == token.STRING { + tag = &ast.BasicLit{ + ValuePos: p.pos, + Kind: p.tok, + Value: p.lit, + } + p.next() + } + + p.expectSemi() + return &ast.Field{ + Doc: doc, + Names: names, + Type: typ, + Tag: tag, + Comment: p.lineComment, + } +} + +func (p *parser) parseMapType() *ast.MapType { + if p.trace { + defer un(trace(p, "MapType")) + } + + pos := p.expect(token.IDENT) // map + p.expect(token.LBRACK) + key := p.parseType() + p.expect(token.RBRACK) + value := p.parseType() + + return &ast.MapType{ + Map: pos, + Key: key, + Value: value, + } +} + +func (p *parser) parseArrayType() *ast.ArrayType { + if p.trace { + defer un(trace(p, "ArrayType")) + } + + lbrack := p.expect(token.LBRACK) + p.expect(token.RBRACK) + elt := p.parseType() + return &ast.ArrayType{ + Lbrack: lbrack, + //Len: nil, + Elt: elt, + } +} + +func (p *parser) parsePointerType() *ast.StarExpr { + if p.trace { + defer un(trace(p, "PointerType")) + } + + star := p.expect(token.MUL) + base := p.parseType() + + return &ast.StarExpr{ + Star: star, + X: base, + } +} + +func (p *parser) parseParenExpr() *ast.ParenExpr { + if p.trace { + defer un(trace(p, "ParenExpr")) + } + + lparen := p.expect(token.LPAREN) + typ := p.parseType() + rparen := p.expect(token.RPAREN) + return &ast.ParenExpr{ + Lparen: lparen, + X: typ, + Rparen: rparen, + } +} + +func (p *parser) parseType() ast.Expr { + if p.trace { + defer un(trace(p, "Type")) + } + + typ := p.tryIdentOrType() + + if typ == nil { + pos := p.pos + p.errorExpected(pos, "type") + p.advance() + return &ast.BadExpr{From: pos, To: p.pos} + } + return typ +} + +func (p *parser) tryIdentOrType() ast.Expr { + switch p.tok { + case token.IDENT: + switch p.lit { + case "struct": + return p.parseStructType() + case "map": + return p.parseMapType() + default: + return p.parseIdent(true) + } + case token.LBRACE: + return p.parseStructType() + case token.LBRACK: + return p.parseArrayType() + case token.MUL: + return p.parsePointerType() + case token.LPAREN: + return p.parseParenExpr() + } + return nil +} + +// ---------------------------------------------------------------------------- +// service + +func (p *parser) parseService() ast.Decl { + if p.trace { + defer un(trace(p, "ServiceDecl")) + } + + var serviceExt *ast.ServiceExtDecl + if ast.SERVEREXT.Is(p.lit) { + serviceExt = p.parseServiceExtDecl() + } + if !ast.SERVICE.Is(p.lit) { + pos := p.pos + p.errorExpected(pos, "service") + p.advance() + return &ast.BadDecl{ + From: pos, + To: p.pos, + } + } + serviceApi := p.parseServiceApiDecl() + p.expectSemi() + return &ast.ServiceDecl{ + ServiceExt: serviceExt, + ServiceApi: serviceApi, + } +} + +func (p *parser) parseServiceExtDecl() *ast.ServiceExtDecl { + if p.trace { + defer un(trace(p, "ServiceExtDecl")) + } + pos := p.expect(token.IDENT) + lparen := p.expect(token.LPAREN) + kvs := p.parseElementList() + rparen := p.expect(token.RPAREN) + p.expectSemi() + + return &ast.ServiceExtDecl{ + TokPos: pos, + Lparen: lparen, + Kvs: kvs, + Rparen: rparen, + } +} + +func (p *parser) parseServiceApiDecl() *ast.ServiceApiDecl { + if p.trace { + defer un(trace(p, "ServiceApiDecl")) + } + pos := p.expect(token.IDENT) + name := p.parseIdent(false) + lbrace := p.expect(token.LBRACE) + serviceRoutes := p.parseServiceRouteList() + rbrace := p.expect(token.RBRACE) + + return &ast.ServiceApiDecl{ + TokPos: pos, + Name: name, + Lbrace: lbrace, + ServiceRoutes: serviceRoutes, + Rbrace: rbrace, + } +} + +func (p *parser) parseServiceRouteList() []*ast.ServiceRouteDecl { + if p.trace { + defer un(trace(p, "ServiceRouteList")) + } + + var routes []*ast.ServiceRouteDecl + for p.tok != token.RBRACE && p.tok != token.EOF { + routes = append(routes, p.parseServiceRouteDecl()) + } + return routes +} + +func (p *parser) parseServiceRouteDecl() *ast.ServiceRouteDecl { + if p.trace { + defer un(trace(p, "ServiceRouteDecl")) + } + + pos := p.pos + var atDoc, atHandler *ast.KeyValueExpr + for p.tok == token.IDENT { + if ast.RouteDoc.Is(p.lit) { + atDoc = p.parseElement(false) + } else if ast.RouteHandler.Is(p.lit) { + atHandler = p.parseElement(false) + } else { + break + } + p.expectSemi() + } + route := p.parseRoute() + return &ast.ServiceRouteDecl{ + TokPos: pos, + AtDoc: atDoc, + AtHandler: atHandler, + Route: route, + } +} + +func (p *parser) parseRoute() *ast.Route { + if p.trace { + defer un(trace(p, "Route")) + } + method := p.parseIdent(true) + path := p.parseIdent(false) + + var req, resp *ast.ParenExpr + if p.tok == token.LPAREN { + req = p.parseParenExpr() + } + + var returns token.Pos + if p.tok == token.IDENT && ast.RouteReturns.Is(p.lit) { + returns = p.expect(token.IDENT) + resp = p.parseParenExpr() + } + end := p.pos + p.expectSemi() + + return &ast.Route{ + Method: method, + Path: path, + Req: req, + ReturnPos: returns, + Resp: resp, + EndPos: end, + } +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..36c9465 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,214 @@ +package parser + +import ( + "github.com/zeromicro/zero-api/ast" + "github.com/zeromicro/zero-api/scanner" + "github.com/zeromicro/zero-api/token" +) + +type ( + parser struct { + file *token.File + errors scanner.ErrorList + scanner scanner.Scanner + + mode Mode // + trace bool // == (mode&Trace != 0) + indent int // indentation used for tracing output + + comments []*ast.CommentGroup + leadComment *ast.CommentGroup + lineComment *ast.CommentGroup + + pos token.Pos + tok token.Token + lit string + } +) + +func (p *parser) init(fset *token.FileSet, filename string, src []byte, mode Mode) { + p.file = fset.AddFile(filename, -1, len(src)) + var m scanner.Mode + if mode&ParseComments != 0 { + m = scanner.ScanComments + } + + eh := func(pos token.Position, msg string) { p.errors.Add(pos, msg) } + p.scanner.Init(p.file, src, eh, m) + + p.mode = mode + p.trace = mode&Trace != 0 + p.next() +} + +// ---------------------------------------------------------------------------- +// next + +func (p *parser) next() { + p.leadComment = nil + p.lineComment = nil + prev := p.pos + p.next0() + + if p.tok == token.COMMENT { + var comment *ast.CommentGroup + var endline int + + if p.file.Line(p.pos) == p.file.Line(prev) { + // The comment is on same line as the previous token; it + // cannot be a lead comment but may be a line comment. + comment, endline = p.consumeCommentGroup(0) + if p.file.Line(p.pos) != endline || p.tok == token.EOF { + // The next token is on a different line, thus + // the last comment group is a line comment. + p.lineComment = comment + } + } + + // consume successor comments, if any + endline = -1 + for p.tok == token.COMMENT { + comment, endline = p.consumeCommentGroup(1) + } + + if endline+1 == p.file.Line(p.pos) { + // The next token is following on the line immediately after the + // comment group, thus the last comment group is a lead comment. + p.leadComment = comment + } + } +} + +func (p *parser) next0() { + if p.trace && p.pos.IsValid() { + s := p.tok.String() + switch { + case p.tok.IsLiteral(): + p.printTrace(s, p.lit) + default: + p.printTrace(s) + } + } + + p.pos, p.tok, p.lit = p.scanner.Scan() +} + +// ---------------------------------------------------------------------------- +// comment + +func (p *parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) { + var list []*ast.Comment + endline = p.file.Line(p.pos) + for p.tok == token.COMMENT && p.file.Line(p.pos) <= endline+n { + var comment *ast.Comment + comment, endline = p.consumeComment() + list = append(list, comment) + } + + comments = &ast.CommentGroup{ + List: list, + } + p.comments = append(p.comments, comments) + return +} + +func (p *parser) consumeComment() (*ast.Comment, int) { + endline := p.file.Line(p.pos) + if p.lit[1] == '*' { + // don't use range here - no need to decode Unicode code points + for i := 0; i < len(p.lit); i++ { + if p.lit[i] == '\n' { + endline++ + } + } + } + comment := &ast.Comment{ + Slash: p.pos, + Text: p.lit, + } + p.next0() + return comment, endline +} + +func (p *parser) expect(tok token.Token) token.Pos { + pos := p.pos + if p.tok != tok { + p.errorExpected(pos, "'"+tok.String()+"'") + } + p.next() + return pos +} + +func (p *parser) expectSemi() { + if p.tok != token.RPAREN && p.tok != token.RBRACE { + switch p.tok { + case token.COMMA: + p.errorExpected(p.pos, `";"`) + fallthrough + case token.SEMICOLON: + p.next() + default: + p.errorExpected(p.pos, `";"`) + p.advance() + } + } +} + +func (p *parser) advance() { + for ; p.tok != token.EOF; p.next() { + if p.tok == token.IDENT { + if p.lit == "import" || p.lit == "type" || p.lit == "info" || p.lit == "service" || p.lit == "@server" { + return + } + } + } +} + +// ---------------------------------------------------------------------------- +// parse + +func (p *parser) parseFile() *ast.File { + if p.trace { + defer un(trace(p, "File")) + } + + if p.errors.Len() != 0 { + return nil + } + + doc := p.leadComment + var syntax *ast.SyntaxDecl + if p.tok == token.IDENT && ast.SYNTAX.Is(p.lit) { + syntax = p.parseSyntaxDecl() + } else { + // TODO: default syntax + } + + var imports []*ast.GenDecl + var info *ast.InfoDecl + for p.tok == token.IDENT && (ast.IMPORT.Is(p.lit) || ast.INFO.Is(p.lit)) { + if ast.INFO.Is(p.lit) { + info = p.parseInfoDecl() + } else if ast.IMPORT.Is(p.lit) { + imports = append(imports, p.parseGenDecl(ast.IMPORT, p.parseImportSpec)) // parse import + } + } + + // type or service + var decls []ast.Decl + for p.tok != token.EOF { + decls = append(decls, p.parseDecl()) + } + + f := &ast.File{ + Doc: doc, + SyntaxDecl: syntax, + ImportDecls: imports, + InfoDecl: info, + Decls: decls, + } + + // TODO: resolveFile + + return f +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..fbde027 --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,20 @@ +package parser + +import ( + "testing" + + "github.com/zeromicro/zero-api/token" +) + +var validFiles = []string{ + "../testdata/demo.api", +} + +func TestParse(t *testing.T) { + for _, name := range validFiles { + _, err := ParseFile(token.NewFileSet(), name, nil, AllErrors) + if err != nil { + t.Fatalf("ParseFile(%s): %v", name, err) + } + } +} diff --git a/parser/trace.go b/parser/trace.go new file mode 100644 index 0000000..be7175e --- /dev/null +++ b/parser/trace.go @@ -0,0 +1,29 @@ +package parser + +import "fmt" + +func (p *parser) printTrace(a ...interface{}) { + const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + const n = len(dots) + pos := p.file.Position(p.pos) + fmt.Printf("%5d:%3d: ", pos.Line, pos.Column) + i := 2 * p.indent + for i > n { + fmt.Print(dots) + i -= n + } + // i <= n + fmt.Print(dots[0:i]) + fmt.Println(a...) +} + +func trace(p *parser, msg string) *parser { + p.printTrace(msg, "(") + p.indent++ + return p +} + +func un(p *parser) { + p.indent-- + p.printTrace(")") +} diff --git a/scanner/example_test.go b/scanner/example_test.go index 6445736..3ac5484 100644 --- a/scanner/example_test.go +++ b/scanner/example_test.go @@ -28,7 +28,7 @@ func ExampleScanner_Scan() { // output: // 1:1 IDENT "post" - // 1:6 IDENT " /foo" + // 1:6 IDENT "/foo" // 1:11 ( "" // 1:12 IDENT "Foo" // 1:15 ) "" diff --git a/scanner/scanner.go b/scanner/scanner.go index 6198307..484c23b 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -178,9 +178,9 @@ scanAgain: tok = token.COMMENT lit = comment default: + insertSemi = true lit = s.scanIdentifier(true, offs) tok = token.IDENT - insertSemi = s.insertSemi } case '\n': s.insertSemi = false @@ -219,10 +219,12 @@ scanAgain: tok = token.RBRACE case '~': tok = token.TILDE + case '*': + tok = token.MUL default: + insertSemi = true lit = s.scanIdentifier(false, offs) tok = token.IDENT - insertSemi = s.insertSemi } if s.mode&dontInsertSemis == 0 { diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index 967a351..64213e2 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -684,16 +684,12 @@ func BenchmarkScan(b *testing.B) { func BenchmarkScanFiles(b *testing.B) { // Scan a few arbitrary large files, and one small one, to provide some // variety in benchmarks. - // TODO: change *.go to *.api for _, p := range []string{ - "go/types/expr.go", - "go/parser/parser.go", - "net/http/server.go", - "go/scanner/errors.go", + "testdata/demo.api", } { b.Run(p, func(b *testing.B) { b.StopTimer() - filename := filepath.Join("..", "..", filepath.FromSlash(p)) + filename := filepath.Join("..", filepath.FromSlash(p)) src, err := os.ReadFile(filename) if err != nil { b.Fatal(err) diff --git a/testdata/demo.api b/testdata/demo.api new file mode 100644 index 0000000..48be1ce --- /dev/null +++ b/testdata/demo.api @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 The Go-zero Authors (go-zero.dev). All rights reserved. + */ + +// api语法版本 +syntax = "v1" + +info( + author: "dylan" + date: "2022-01-28" + desc: "api 语法 demo" +) + +import "a/b.api" + +import( + "c.api" + "d/d.api" +) + +type ( + User { // User for 注释 + Name string + } +) + +type Foo { + ID int64 `json:"id"` +} + +@server( + auth: dylan + timeout: 3s +) +service foo-api { + @doc ping-doc + @handler ping + get /ping + + @doc "foo" + @handler bar + post /bar/:id (Foo) returns (User) +} \ No newline at end of file diff --git a/token/token.go b/token/token.go index 9e311bb..0fba5ce 100644 --- a/token/token.go +++ b/token/token.go @@ -6,6 +6,8 @@ import ( "unicode" ) +const NoPos Pos = 0 + // Token is the set of lexical tokens of the zero-api programming language. type Token token.Token @@ -24,6 +26,8 @@ const ( literal_end operator_beg + MUL // * + ASSIGN // = LPAREN // ( LBRACK // [ diff --git a/token/token_test.go b/token/token_test.go index a578e67..fb9ca54 100644 --- a/token/token_test.go +++ b/token/token_test.go @@ -12,7 +12,7 @@ func TestIsIdentifier(t *testing.T) { {"Space", " ", false}, {"SpaceSuffix", "foo ", false}, {"Number", "123", false}, - {"Keyword", "func", false}, + {"Keyword", "func", true}, {"LettersASCII", "foo", true}, {"MixedASCII", "_bar123", true}, From ec42e5ca681d2b7db214cdcd2e9899bb250ff0f4 Mon Sep 17 00:00:00 2001 From: dylan Date: Thu, 16 Jun 2022 00:32:15 +0800 Subject: [PATCH 4/5] feat: add api fmt cmd --- ast/ast.go | 27 +- cmd/apifmt.go | 142 +++++ format/format.go | 28 + format/internal.go | 27 + parser/parse.go | 2 +- parser/parser.go | 15 +- printer/nodes.go | 293 ++++++++++ printer/printer.go | 1392 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 1917 insertions(+), 9 deletions(-) create mode 100644 cmd/apifmt.go create mode 100644 format/format.go create mode 100644 format/internal.go create mode 100644 printer/nodes.go create mode 100644 printer/printer.go diff --git a/ast/ast.go b/ast/ast.go index 38e04eb..43fe4de 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -293,6 +293,10 @@ func (x *BadDecl) Pos() token.Pos { return x.From } func (x *BadDecl) End() token.Pos { return x.To } func (x *BadDecl) declNode() {} +func (x *SyntaxDecl) Pos() token.Pos { return x.TokPos } +func (x *SyntaxDecl) End() token.Pos { return x.SyntaxName.End() } +func (x *SyntaxDecl) declNode() {} + func (x *GenDecl) Pos() token.Pos { return x.TokPos } func (x *GenDecl) End() token.Pos { if x.Rparen.IsValid() { @@ -302,6 +306,9 @@ func (x *GenDecl) End() token.Pos { } func (x *GenDecl) declNode() {} +func (x *InfoDecl) Pos() token.Pos { return x.TokPos } +func (x *InfoDecl) End() token.Pos { return x.Rparen + 1 } + // ---------------------------------------------------------------------------- // service @@ -360,6 +367,9 @@ func (x *ServiceExtDecl) token() token.Pos { return x.Rparen } func (x *ServiceApiDecl) Pos() token.Pos { return x.TokPos } func (x *ServiceApiDecl) End() token.Pos { return x.Rbrace } +func (x *Route) Pos() token.Pos { return x.Method.Pos() } +func (x *Route) End() token.Pos { return x.EndPos } + // ---------------------------------------------------------------------------- // File @@ -369,7 +379,22 @@ type File struct { SyntaxDecl *SyntaxDecl ImportDecls []*GenDecl InfoDecl *InfoDecl - Decls []Decl // top-level declarations; or nil; types or service + Decls []Decl // top-level declarations; or nil; types(GenDecl) or service(ServiceDecl) + + Comments []*ast.CommentGroup +} + +func (x *File) Pos() token.Pos { + if x.SyntaxDecl != nil { + return x.SyntaxDecl.Pos() + } + return 0 +} +func (x *File) End() token.Pos { + if n := len(x.Decls); n > 0 { + return x.Decls[n-1].End() + } + return token.NoPos } // ---------------------------------------------------------------------------- diff --git a/cmd/apifmt.go b/cmd/apifmt.go new file mode 100644 index 0000000..2e9bc12 --- /dev/null +++ b/cmd/apifmt.go @@ -0,0 +1,142 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "io" + "io/fs" + "io/ioutil" + "os" + "path/filepath" + "runtime" + "strings" + + "github.com/zeromicro/zero-api/format" +) + +var ( + write = flag.Bool("w", false, "write result to (source) file instead of stdout") +) + +func usage() { + fmt.Fprint(os.Stderr, "usage: apifmt [flags] [path ...]\n") + flag.PrintDefaults() +} + +func report(err error) { + fmt.Println(err) + os.Exit(2) +} + +func processFile(fileName string, info fs.FileInfo, in io.Reader, out io.Writer) error { + if in == nil { + var err error + in, err = os.Open(fileName) + if err != nil { + return err + } + } + + src, err := ioutil.ReadAll(in) + if err != nil { + return err + } + + res, err := format.Source(src, fileName) + if err != nil { + return err + } + + if *write { + _, err := out.Write(res) + return err + } + // write to file + perm := info.Mode().Perm() + backName, err := backupFile(fileName+".", src, perm) + if err != nil { + return err + } + + err = os.WriteFile(fileName, res, perm) + if err != nil { + _ = os.Rename(backName, fileName) + return err + } + + err = os.Remove(backName) + return err +} + +const chmodSupported = runtime.GOOS != "windows" + +func backupFile(filename string, data []byte, perm fs.FileMode) (string, error) { + f, err := os.CreateTemp(filepath.Dir(filename), filepath.Base(filename)) + if err != nil { + return "", err + } + backname := f.Name() + if chmodSupported { + err := f.Chmod(perm) + if err != nil { + _ = f.Close() + _ = os.Remove(backname) + return backname, err + } + } + + _, err = f.Write(data) + if err1 := f.Close(); err1 != nil { + err = err1 + } + return backname, err +} + +func main() { + flag.Usage = usage + flag.Parse() + + args := flag.Args() + if len(args) == 0 { + if *write { + report(errors.New("error: cannot use -w with standard input")) + return + } + // *write not support standard input, so fs.FileInfo can be nil. + if err := processFile("", nil, os.Stdin, os.Stdout); err != nil { + report(err) + } + return + } + + for _, path := range args { + walkSubDir := strings.HasSuffix(path, "/...") + if walkSubDir { + path = path[:len(path)-1] + } + filepath.WalkDir(path, func(path string, d fs.DirEntry, err error) error { + if err != nil { + fmt.Fprintln(os.Stderr, err) + } else if d.IsDir() { + if !walkSubDir { + return filepath.SkipDir + } + } else { + ext := filepath.Ext(path) + if ext != ".api" { + return nil + } + + info, err := d.Info() + if err != nil { + return err + } + if err := processFile(path, info, nil, os.Stdout); err != nil { + report(err) + } + } + return err + }) + } +} diff --git a/format/format.go b/format/format.go new file mode 100644 index 0000000..d4e749a --- /dev/null +++ b/format/format.go @@ -0,0 +1,28 @@ +package format + +import ( + "github.com/zeromicro/zero-api/parser" + "github.com/zeromicro/zero-api/printer" + "github.com/zeromicro/zero-api/token" +) + +var config = printer.Config{ + Mode: printer.UseSpaces | printer.TabIndent, + Tabwidth: 8, +} + +const parseMode = parser.ParseComments + +func Source(src []byte, filename ...string) ([]byte, error) { + var fname string + if len(filename) > 0 { + fname = filename[0] + } + + fset := token.NewFileSet() + file, err := parse(fset, fname, src) + if err != nil { + return nil, err + } + return format(fset, file, config) +} diff --git a/format/internal.go b/format/internal.go new file mode 100644 index 0000000..d8410dc --- /dev/null +++ b/format/internal.go @@ -0,0 +1,27 @@ +package format + +import ( + "bytes" + + "github.com/zeromicro/zero-api/printer" + + "github.com/zeromicro/zero-api/ast" + "github.com/zeromicro/zero-api/parser" + "github.com/zeromicro/zero-api/token" +) + +func parse(fset *token.FileSet, filename string, src []byte) (file *ast.File, err error) { + return parser.ParseFile(fset, filename, src, parseMode) +} + +func format( + fset *token.FileSet, + file *ast.File, + cfg printer.Config) ([]byte, error) { + var buf bytes.Buffer + err := cfg.Fprint(&buf, fset, file) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} diff --git a/parser/parse.go b/parser/parse.go index 7fdfa76..b932870 100644 --- a/parser/parse.go +++ b/parser/parse.go @@ -255,7 +255,7 @@ func (p *parser) parseFieldDecl() *ast.Field { if p.tok == token.STRING || p.tok == token.SEMICOLON || p.tok == token.RBRACE { typ = name } else { - names := []*ast.Ident{name} + names = []*ast.Ident{name} for p.tok == token.COMMA { p.next() names = append(names, p.parseIdent(true)) diff --git a/parser/parser.go b/parser/parser.go index 36c9465..f377f0a 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -184,14 +184,14 @@ func (p *parser) parseFile() *ast.File { // TODO: default syntax } - var imports []*ast.GenDecl var info *ast.InfoDecl - for p.tok == token.IDENT && (ast.IMPORT.Is(p.lit) || ast.INFO.Is(p.lit)) { - if ast.INFO.Is(p.lit) { - info = p.parseInfoDecl() - } else if ast.IMPORT.Is(p.lit) { - imports = append(imports, p.parseGenDecl(ast.IMPORT, p.parseImportSpec)) // parse import - } + if p.tok == token.IDENT && ast.INFO.Is(p.lit) { + info = p.parseInfoDecl() + } + + var imports []*ast.GenDecl + for p.tok == token.IDENT && ast.IMPORT.Is(p.lit) { + imports = append(imports, p.parseGenDecl(ast.IMPORT, p.parseImportSpec)) // parse import } // type or service @@ -206,6 +206,7 @@ func (p *parser) parseFile() *ast.File { ImportDecls: imports, InfoDecl: info, Decls: decls, + Comments: p.comments, } // TODO: resolveFile diff --git a/printer/nodes.go b/printer/nodes.go new file mode 100644 index 0000000..499e06e --- /dev/null +++ b/printer/nodes.go @@ -0,0 +1,293 @@ +package printer + +import ( + "github.com/zeromicro/zero-api/ast" + "github.com/zeromicro/zero-api/token" +) + +// setComment sets g as the next comment if g != nil and if node comments +// are enabled - this mode is used when printing source code fragments such +// as exports only. It assumes that there is no pending comment in p.comments +// and at most one pending comment in the p.comment cache. +func (p *printer) setComment(g *ast.CommentGroup) { + if g == nil || !p.useNodeComments { + return + } + if p.comments == nil { + // initialize p.comments lazily + p.comments = make([]*ast.CommentGroup, 1) + } else if p.cindex < len(p.comments) { + // for some reason there are pending comments; this + // should never happen - handle gracefully and flush + // all comments up to g, ignore anything after that + p.flush(p.posFor(g.List[0].Pos()), token.ILLEGAL) + p.comments = p.comments[0:1] + // in debug mode, report error + p.internalError("setComment found pending comments") + } + p.comments[0] = g + p.cindex = 0 + // don't overwrite any pending comment in the p.comment cache + // (there may be a pending comment when a line comment is + // immediately followed by a lead comment with no other + // tokens between) + if p.commentOffset == infinity { + p.nextComment() // get comment ready for use + } +} + +func (p *printer) file(node *ast.File) { + p.setComment(node.Doc) + + p.syntax(node.SyntaxDecl) + p.infoDecl(node.InfoDecl) + p.importDecls(node.ImportDecls) + p.declList(node.Decls) + + p.print(newline) +} + +func (p *printer) syntax(node *ast.SyntaxDecl) { + if node == nil { + return + } + p.print(node.Pos(), ast.SYNTAX, blank) + p.print(node.Assign, token.ASSIGN, blank) + p.expr(node.SyntaxName) + p.print(newline) +} + +func (p *printer) infoDecl(node *ast.InfoDecl) { + if node == nil { + return + } + if len(p.output) > 0 { + p.print(newline) + } + + p.print(node.Pos(), ast.INFO, blank, token.LPAREN, newline) + p.print(indent) + for _, each := range node.Elements { + p.expr(each) + p.print(newline) + } + p.print(unindent, token.RPAREN, newline) +} + +func (p *printer) importDecls(nodes []*ast.GenDecl) { + for _, each := range nodes { + if len(p.output) > 0 { + p.print(newline) + } + p.genDecl(each) + } +} + +// genDecl for import or type +func (p *printer) genDecl(node *ast.GenDecl) { + p.print(node.Pos(), node.Key, blank) + if len(node.Specs) == 0 { + p.print(node.Lparen, token.LPAREN) + p.print(node.Rparen, token.RPAREN) + return + } + + if node.Lparen.IsValid() || len(node.Specs) > 1 { + p.print(node.Lparen, token.LPAREN) + p.print(indent, formfeed) + for i, s := range node.Specs { + if i > 0 { + p.print(newline) + if node.Key == ast.TYPE { + p.print(newline) + } + } + p.spec(s) + } + p.print(unindent, formfeed) + p.print(node.Rparen, token.RPAREN) + } else if len(node.Specs) > 0 { // one line declaration + p.spec(node.Specs[0]) + } +} + +// spec for importSpec or typeSpec +func (p *printer) spec(spec ast.Spec) { + switch x := spec.(type) { + case *ast.ImportSpec: + p.expr(x.Path) + p.print(x.End()) + + case *ast.TypeSpec: + p.expr(x.Name) + p.expr(x.Type) + p.print(x.End()) + + default: + panic("unreachable") + } +} + +func (p *printer) expr(node ast.Expr) { + p.print(node.Pos()) + switch x := node.(type) { + case *ast.BadExpr: + // todo: + case *ast.Ident: + p.print(x) + + case *ast.BasicLit: + p.print(x) + + case *ast.KeyValueExpr: + p.print(x.Key) + if x.Colon.IsValid() { + p.print(token.COLON) + } + p.print(blank, x.Value) + + case *ast.ParenExpr: + p.print(token.LPAREN, x.X, token.RPAREN) + + case *ast.ArrayType: + p.print(token.LBRACK) + p.expr(x.Elt) + p.print(token.RBRACK) + + case *ast.StructType: + p.print(ast.TYPE) + p.fieldList(x.Fields) + + case *ast.MapType: + p.print("map", token.LBRACK) + p.expr(x.Key) + p.print(token.RBRACK) + p.expr(x.Value) + + default: + panic("unreachable") + } +} + +func (p *printer) fieldList(fields *ast.FieldList) { + if fields == nil { + return + } + p.print(fields.Pos(), blank, fields.Lbrace, token.LBRACE, indent) + // TODO: has comment hasComments || + if len(fields.List) > 0 { + p.print(formfeed) + } + sep := vtab + if len(fields.List) == 1 { + sep = blank + } + for i, f := range fields.List { + if i > 0 { + p.print(newline) + } + for j, x := range f.Names { + if j > 0 { + p.print(x.Pos(), token.COMMA, blank) + } + p.expr(x) + } + p.print(sep) + p.print(f.Type) + if f.Tag != nil { + p.print(sep) + p.expr(f.Tag) + } + } + p.print(unindent, formfeed, fields.Rbrace, token.RBRACE) +} + +// ---------------------------------------------------------------------------- +// decl + +func (p *printer) declList(decls []ast.Decl) { + for _, d := range decls { + p.print(newline, newline) + p.decl(d) + } +} + +func (p *printer) decl(decl ast.Decl) { + switch d := decl.(type) { + case *ast.BadDecl: + p.print(d.Pos(), "BadDecl") + + case *ast.GenDecl: // just type decl + p.genDecl(d) + + case *ast.ServiceDecl: + p.serviceDecl(d) + + default: + panic("unreachable") + } +} + +// ---------------------------------------------------------------------------- +// service decl + +func (p *printer) serviceDecl(node *ast.ServiceDecl) { + if node.ServiceExt != nil { + p.serviceExtDecl(node.ServiceExt) + p.print(newline) + } + + p.serviceApiDecl(node.ServiceApi) +} + +func (p *printer) serviceExtDecl(node *ast.ServiceExtDecl) { + p.print(node.Pos(), ast.SERVEREXT, blank, token.LPAREN) + p.print(indent, formfeed) + for _, each := range node.Kvs { + //p.print(each.Pos()) + p.expr(each) + p.print(newline) + } + p.print(unindent, formfeed, token.RPAREN) +} + +func (p *printer) serviceApiDecl(node *ast.ServiceApiDecl) { + p.print(node.Pos(), ast.SERVICE, blank) + p.expr(node.Name) + p.print(blank, node.Lbrace, token.LBRACE) + p.print(indent, formfeed) + for i, x := range node.ServiceRoutes { + if i > 0 { + p.print(newline, newline) + } + p.serviceRouteDecl(x) + } + p.print(unindent, formfeed) + p.print(token.RBRACE) +} + +func (p *printer) serviceRouteDecl(node *ast.ServiceRouteDecl) { + p.print(node.TokPos) + if node.AtDoc != nil { + p.expr(node.AtDoc) + p.print(newline) + } + if node.AtHandler != nil { + p.expr(node.AtHandler) + p.print(newline) + } + p.route(node.Route) +} + +func (p *printer) route(node *ast.Route) { + p.print(node.Method, blank, node.Path) + if node.Req != nil { + p.print(blank) + p.expr(node.Req) + } + if node.Resp != nil { + p.print(blank) + p.print(ast.RouteReturns, blank) + p.expr(node.Resp) + } +} diff --git a/printer/printer.go b/printer/printer.go new file mode 100644 index 0000000..bf17e61 --- /dev/null +++ b/printer/printer.go @@ -0,0 +1,1392 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package printer implements printing of AST nodes. +package printer + +import ( + "fmt" + "io" + "os" + "strings" + "text/tabwriter" + "unicode" + + "github.com/zeromicro/zero-api/ast" + "github.com/zeromicro/zero-api/token" +) + +const ( + maxNewlines = 2 // max. number of newlines between source text + debug = false // enable for debugging + infinity = 1 << 30 +) + +type whiteSpace byte + +const ( + ignore = whiteSpace(0) + blank = whiteSpace(' ') + vtab = whiteSpace('\v') + newline = whiteSpace('\n') + formfeed = whiteSpace('\f') + indent = whiteSpace('>') + unindent = whiteSpace('<') +) + +// A pmode value represents the current printer mode. +type pmode int + +const ( + noExtraBlank pmode = 1 << iota // disables extra blank after /*-style comment + noExtraLinebreak // disables extra line break after /*-style comment +) + +type commentInfo struct { + cindex int // current comment index + comment *ast.CommentGroup // = printer.comments[cindex]; or nil + commentOffset int // = printer.posFor(printer.comments[cindex].List[0].Pos()).Offset; or infinity + commentNewline bool // true if the comment group contains newlines +} + +type printer struct { + // Configuration (does not change after initialization) + Config + fset *token.FileSet + + // Current state + output []byte // raw printer result + indent int // current indentation + level int // level == 0: outside composite literal; level > 0: inside composite literal + mode pmode // current printer mode + endAlignment bool // if set, terminate alignment immediately + impliedSemi bool // if set, a linebreak implies a semicolon + lastTok token.Token // last token printed (token.ILLEGAL if it's whitespace) + prevOpen token.Token // previous non-brace "open" token (, [, or token.ILLEGAL + wsbuf []whiteSpace // delayed white space + + // Positions + // The out position differs from the pos position when the result + // formatting differs from the source formatting (in the amount of + // white space). If there's a difference and SourcePos is set in + // ConfigMode, //line directives are used in the output to restore + // original source positions for a reader. + pos token.Position // current position in AST (source) space + out token.Position // current position in output space + last token.Position // value of pos after calling writeString + linePtr *int // if set, record out.Line for the next token in *linePtr + + // The list of all source comments, in order of appearance. + comments []*ast.CommentGroup // may be nil + useNodeComments bool // if not set, ignore lead and line comments of nodes + + // Information about p.comments[p.cindex]; set up by nextComment. + commentInfo + + // Cache of already computed node sizes. + nodeSizes map[ast.Node]int + + // Cache of most recently computed line position. + cachedPos token.Pos + cachedLine int // line corresponding to cachedPos +} + +func (p *printer) init(cfg *Config, fset *token.FileSet, nodeSizes map[ast.Node]int) { + p.Config = *cfg + p.fset = fset + p.pos = token.Position{Line: 1, Column: 1} + p.out = token.Position{Line: 1, Column: 1} + p.wsbuf = make([]whiteSpace, 0, 16) // whitespace sequences are short + p.nodeSizes = nodeSizes + p.cachedPos = -1 +} + +func (p *printer) internalError(msg ...interface{}) { + if debug { + fmt.Print(p.pos.String() + ": ") + fmt.Println(msg...) + panic("go/printer") + } +} + +// commentsHaveNewline reports whether a list of comments belonging to +// an *ast.CommentGroup contains newlines. Because the position information +// may only be partially correct, we also have to read the comment text. +func (p *printer) commentsHaveNewline(list []*ast.Comment) bool { + // len(list) > 0 + line := p.lineFor(list[0].Pos()) + for i, c := range list { + if i > 0 && p.lineFor(list[i].Pos()) != line { + // not all comments on the same line + return true + } + if t := c.Text; len(t) >= 2 && (t[1] == '/' || strings.Contains(t, "\n")) { + return true + } + } + _ = line + return false +} + +func (p *printer) nextComment() { + for p.cindex < len(p.comments) { + c := p.comments[p.cindex] + p.cindex++ + if list := c.List; len(list) > 0 { + p.comment = c + p.commentOffset = p.posFor(list[0].Pos()).Offset + p.commentNewline = p.commentsHaveNewline(list) + return + } + // we should not reach here (correct ASTs don't have empty + // ast.CommentGroup nodes), but be conservative and try again + } + // no more comments + p.commentOffset = infinity +} + +// commentBefore reports whether the current comment group occurs +// before the next position in the source code and printing it does +// not introduce implicit semicolons. +// +func (p *printer) commentBefore(next token.Position) bool { + return p.commentOffset < next.Offset && (!p.impliedSemi || !p.commentNewline) +} + +// commentSizeBefore returns the estimated size of the +// comments on the same line before the next position. +// +func (p *printer) commentSizeBefore(next token.Position) int { + // save/restore current p.commentInfo (p.nextComment() modifies it) + defer func(info commentInfo) { + p.commentInfo = info + }(p.commentInfo) + + size := 0 + for p.commentBefore(next) { + for _, c := range p.comment.List { + size += len(c.Text) + } + p.nextComment() + } + return size +} + +// recordLine records the output line number for the next non-whitespace +// token in *linePtr. It is used to compute an accurate line number for a +// formatted construct, independent of pending (not yet emitted) whitespace +// or comments. +// +func (p *printer) recordLine(linePtr *int) { + p.linePtr = linePtr +} + +// linesFrom returns the number of output lines between the current +// output line and the line argument, ignoring any pending (not yet +// emitted) whitespace or comments. It is used to compute an accurate +// size (in number of lines) for a formatted construct. +// +func (p *printer) linesFrom(line int) int { + return p.out.Line - line +} + +func (p *printer) posFor(pos token.Pos) token.Position { + // not used frequently enough to cache entire token.Position + return p.fset.PositionFor(pos, false /* absolute position */) +} + +func (p *printer) lineFor(pos token.Pos) int { + if pos != p.cachedPos { + p.cachedPos = pos + p.cachedLine = p.fset.PositionFor(pos, false /* absolute position */).Line + } + return p.cachedLine +} + +// writeLineDirective writes a //line directive if necessary. +func (p *printer) writeLineDirective(pos token.Position) { + if pos.IsValid() && (p.out.Line != pos.Line || p.out.Filename != pos.Filename) { + p.output = append(p.output, tabwriter.Escape) // protect '\n' in //line from tabwriter interpretation + p.output = append(p.output, fmt.Sprintf("//line %s:%d\n", pos.Filename, pos.Line)...) + p.output = append(p.output, tabwriter.Escape) + // p.out must match the //line directive + p.out.Filename = pos.Filename + p.out.Line = pos.Line + } +} + +// writeIndent writes indentation. +func (p *printer) writeIndent() { + // use "hard" htabs - indentation columns + // must not be discarded by the tabwriter + n := p.Config.Indent + p.indent // include base indentation + for i := 0; i < n; i++ { + p.output = append(p.output, '\t') + } + + // update positions + p.pos.Offset += n + p.pos.Column += n + p.out.Column += n +} + +// writeByte writes ch n times to p.output and updates p.pos. +// Only used to write formatting (white space) characters. +func (p *printer) writeByte(ch byte, n int) { + if p.endAlignment { + // Ignore any alignment control character; + // and at the end of the line, break with + // a formfeed to indicate termination of + // existing columns. + switch ch { + case '\t', '\v': + ch = ' ' + case '\n', '\f': + ch = '\f' + p.endAlignment = false + } + } + + if p.out.Column == 1 { + // no need to write line directives before white space + p.writeIndent() + } + + for i := 0; i < n; i++ { + p.output = append(p.output, ch) + } + + // update positions + p.pos.Offset += n + if ch == '\n' || ch == '\f' { + p.pos.Line += n + p.out.Line += n + p.pos.Column = 1 + p.out.Column = 1 + return + } + p.pos.Column += n + p.out.Column += n +} + +// writeString writes the string s to p.output and updates p.pos, p.out, +// and p.last. If isLit is set, s is escaped w/ tabwriter.Escape characters +// to protect s from being interpreted by the tabwriter. +// +// Note: writeString is only used to write Go tokens, literals, and +// comments, all of which must be written literally. Thus, it is correct +// to always set isLit = true. However, setting it explicitly only when +// needed (i.e., when we don't know that s contains no tabs or line breaks) +// avoids processing extra escape characters and reduces run time of the +// printer benchmark by up to 10%. +// +func (p *printer) writeString(pos token.Position, s string, isLit bool) { + if p.out.Column == 1 { + if p.Config.Mode&SourcePos != 0 { + p.writeLineDirective(pos) + } + p.writeIndent() + } + + if pos.IsValid() { + // update p.pos (if pos is invalid, continue with existing p.pos) + // Note: Must do this after handling line beginnings because + // writeIndent updates p.pos if there's indentation, but p.pos + // is the position of s. + p.pos = pos + } + + if isLit { + // Protect s such that is passes through the tabwriter + // unchanged. Note that valid Go programs cannot contain + // tabwriter.Escape bytes since they do not appear in legal + // UTF-8 sequences. + p.output = append(p.output, tabwriter.Escape) + } + + if debug { + p.output = append(p.output, fmt.Sprintf("/*%s*/", pos)...) // do not update p.pos! + } + p.output = append(p.output, s...) + + // update positions + nlines := 0 + var li int // index of last newline; valid if nlines > 0 + for i := 0; i < len(s); i++ { + // Raw string literals may contain any character except back quote (`). + if ch := s[i]; ch == '\n' || ch == '\f' { + // account for line break + nlines++ + li = i + // A line break inside a literal will break whatever column + // formatting is in place; ignore any further alignment through + // the end of the line. + p.endAlignment = true + } + } + p.pos.Offset += len(s) + if nlines > 0 { + p.pos.Line += nlines + p.out.Line += nlines + c := len(s) - li + p.pos.Column = c + p.out.Column = c + } else { + p.pos.Column += len(s) + p.out.Column += len(s) + } + + if isLit { + p.output = append(p.output, tabwriter.Escape) + } + + p.last = p.pos +} + +// writeCommentPrefix writes the whitespace before a comment. +// If there is any pending whitespace, it consumes as much of +// it as is likely to help position the comment nicely. +// pos is the comment position, next the position of the item +// after all pending comments, prev is the previous comment in +// a group of comments (or nil), and tok is the next token. +// +func (p *printer) writeCommentPrefix(pos, next token.Position, prev *ast.Comment, tok token.Token) { + if len(p.output) == 0 { + // the comment is the first item to be printed - don't write any whitespace + return + } + + if pos.IsValid() && pos.Filename != p.last.Filename { + // comment in a different file - separate with newlines + p.writeByte('\f', maxNewlines) + return + } + + if pos.Line == p.last.Line && (prev == nil || prev.Text[1] != '/') { + // comment on the same line as last item: + // separate with at least one separator + hasSep := false + if prev == nil { + // first comment of a comment group + j := 0 + for i, ch := range p.wsbuf { + switch ch { + case blank: + // ignore any blanks before a comment + p.wsbuf[i] = ignore + continue + case vtab: + // respect existing tabs - important + // for proper formatting of commented structs + hasSep = true + continue + case indent: + // apply pending indentation + continue + } + j = i + break + } + p.writeWhitespace(j) + } + // make sure there is at least one separator + if !hasSep { + sep := byte('\t') + if pos.Line == next.Line { + // next item is on the same line as the comment + // (which must be a /*-style comment): separate + // with a blank instead of a tab + sep = ' ' + } + p.writeByte(sep, 1) + } + + } else { + // comment on a different line: + // separate with at least one line break + droppedLinebreak := false + j := 0 + for i, ch := range p.wsbuf { + switch ch { + case blank, vtab: + // ignore any horizontal whitespace before line breaks + p.wsbuf[i] = ignore + continue + case indent: + // apply pending indentation + continue + case unindent: + // if this is not the last unindent, apply it + // as it is (likely) belonging to the last + // construct (e.g., a multi-line expression list) + // and is not part of closing a block + if i+1 < len(p.wsbuf) && p.wsbuf[i+1] == unindent { + continue + } + // if the next token is not a closing }, apply the unindent + // if it appears that the comment is aligned with the + // token; otherwise assume the unindent is part of a + // closing block and stop (this scenario appears with + // comments before a case label where the comments + // apply to the next case instead of the current one) + if tok != token.RBRACE && pos.Column == next.Column { + continue + } + case newline, formfeed: + p.wsbuf[i] = ignore + droppedLinebreak = prev == nil // record only if first comment of a group + } + j = i + break + } + p.writeWhitespace(j) + + // determine number of linebreaks before the comment + n := 0 + if pos.IsValid() && p.last.IsValid() { + n = pos.Line - p.last.Line + if n < 0 { // should never happen + n = 0 + } + } + + // at the package scope level only (p.indent == 0), + // add an extra newline if we dropped one before: + // this preserves a blank line before documentation + // comments at the package scope level (issue 2570) + if p.indent == 0 && droppedLinebreak { + n++ + } + + // make sure there is at least one line break + // if the previous comment was a line comment + if n == 0 && prev != nil && prev.Text[1] == '/' { + n = 1 + } + + if n > 0 { + // use formfeeds to break columns before a comment; + // this is analogous to using formfeeds to separate + // individual lines of /*-style comments + p.writeByte('\f', nlimit(n)) + } + } +} + +// Returns true if s contains only white space +// (only tabs and blanks can appear in the printer's context). +// +func isBlank(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] > ' ' { + return false + } + } + return true +} + +// commonPrefix returns the common prefix of a and b. +func commonPrefix(a, b string) string { + i := 0 + for i < len(a) && i < len(b) && a[i] == b[i] && (a[i] <= ' ' || a[i] == '*') { + i++ + } + return a[0:i] +} + +// trimRight returns s with trailing whitespace removed. +func trimRight(s string) string { + return strings.TrimRightFunc(s, unicode.IsSpace) +} + +// stripCommonPrefix removes a common prefix from /*-style comment lines (unless no +// comment line is indented, all but the first line have some form of space prefix). +// The prefix is computed using heuristics such that is likely that the comment +// contents are nicely laid out after re-printing each line using the printer's +// current indentation. +// +func stripCommonPrefix(lines []string) { + if len(lines) <= 1 { + return // at most one line - nothing to do + } + // len(lines) > 1 + + // The heuristic in this function tries to handle a few + // common patterns of /*-style comments: Comments where + // the opening /* and closing */ are aligned and the + // rest of the comment text is aligned and indented with + // blanks or tabs, cases with a vertical "line of stars" + // on the left, and cases where the closing */ is on the + // same line as the last comment text. + + // Compute maximum common white prefix of all but the first, + // last, and blank lines, and replace blank lines with empty + // lines (the first line starts with /* and has no prefix). + // In cases where only the first and last lines are not blank, + // such as two-line comments, or comments where all inner lines + // are blank, consider the last line for the prefix computation + // since otherwise the prefix would be empty. + // + // Note that the first and last line are never empty (they + // contain the opening /* and closing */ respectively) and + // thus they can be ignored by the blank line check. + prefix := "" + prefixSet := false + if len(lines) > 2 { + for i, line := range lines[1 : len(lines)-1] { + if isBlank(line) { + lines[1+i] = "" // range starts with lines[1] + } else { + if !prefixSet { + prefix = line + prefixSet = true + } + prefix = commonPrefix(prefix, line) + } + + } + } + // If we don't have a prefix yet, consider the last line. + if !prefixSet { + line := lines[len(lines)-1] + prefix = commonPrefix(line, line) + } + + /* + * Check for vertical "line of stars" and correct prefix accordingly. + */ + lineOfStars := false + if p, _, ok := strings.Cut(prefix, "*"); ok { + // remove trailing blank from prefix so stars remain aligned + prefix = strings.TrimSuffix(p, " ") + lineOfStars = true + } else { + // No line of stars present. + // Determine the white space on the first line after the /* + // and before the beginning of the comment text, assume two + // blanks instead of the /* unless the first character after + // the /* is a tab. If the first comment line is empty but + // for the opening /*, assume up to 3 blanks or a tab. This + // whitespace may be found as suffix in the common prefix. + first := lines[0] + if isBlank(first[2:]) { + // no comment text on the first line: + // reduce prefix by up to 3 blanks or a tab + // if present - this keeps comment text indented + // relative to the /* and */'s if it was indented + // in the first place + i := len(prefix) + for n := 0; n < 3 && i > 0 && prefix[i-1] == ' '; n++ { + i-- + } + if i == len(prefix) && i > 0 && prefix[i-1] == '\t' { + i-- + } + prefix = prefix[0:i] + } else { + // comment text on the first line + suffix := make([]byte, len(first)) + n := 2 // start after opening /* + for n < len(first) && first[n] <= ' ' { + suffix[n] = first[n] + n++ + } + if n > 2 && suffix[2] == '\t' { + // assume the '\t' compensates for the /* + suffix = suffix[2:n] + } else { + // otherwise assume two blanks + suffix[0], suffix[1] = ' ', ' ' + suffix = suffix[0:n] + } + // Shorten the computed common prefix by the length of + // suffix, if it is found as suffix of the prefix. + prefix = strings.TrimSuffix(prefix, string(suffix)) + } + } + + // Handle last line: If it only contains a closing */, align it + // with the opening /*, otherwise align the text with the other + // lines. + last := lines[len(lines)-1] + closing := "*/" + before, _, _ := strings.Cut(last, closing) // closing always present + if isBlank(before) { + // last line only contains closing */ + if lineOfStars { + closing = " */" // add blank to align final star + } + lines[len(lines)-1] = prefix + closing + } else { + // last line contains more comment text - assume + // it is aligned like the other lines and include + // in prefix computation + prefix = commonPrefix(prefix, last) + } + + // Remove the common prefix from all but the first and empty lines. + for i, line := range lines { + if i > 0 && line != "" { + lines[i] = line[len(prefix):] + } + } +} + +func (p *printer) writeComment(comment *ast.Comment) { + text := comment.Text + pos := p.posFor(comment.Pos()) + + const linePrefix = "//line " + if strings.HasPrefix(text, linePrefix) && (!pos.IsValid() || pos.Column == 1) { + // Possibly a //-style line directive. + // Suspend indentation temporarily to keep line directive valid. + defer func(indent int) { p.indent = indent }(p.indent) + p.indent = 0 + } + + // shortcut common case of //-style comments + if text[1] == '/' { + //if constraint.IsGoBuild(text) { + // p.goBuild = append(p.goBuild, len(p.output)) + //} else if constraint.IsPlusBuild(text) { + // p.plusBuild = append(p.plusBuild, len(p.output)) + //} + p.writeString(pos, trimRight(text), true) + return + } + + // for /*-style comments, print line by line and let the + // write function take care of the proper indentation + lines := strings.Split(text, "\n") + + // The comment started in the first column but is going + // to be indented. For an idempotent result, add indentation + // to all lines such that they look like they were indented + // before - this will make sure the common prefix computation + // is the same independent of how many times formatting is + // applied (was issue 1835). + if pos.IsValid() && pos.Column == 1 && p.indent > 0 { + for i, line := range lines[1:] { + lines[1+i] = " " + line + } + } + + stripCommonPrefix(lines) + + // write comment lines, separated by formfeed, + // without a line break after the last line + for i, line := range lines { + if i > 0 { + p.writeByte('\f', 1) + pos = p.pos + } + if len(line) > 0 { + p.writeString(pos, trimRight(line), true) + } + } +} + +// writeCommentSuffix writes a line break after a comment if indicated +// and processes any leftover indentation information. If a line break +// is needed, the kind of break (newline vs formfeed) depends on the +// pending whitespace. The writeCommentSuffix result indicates if a +// newline was written or if a formfeed was dropped from the whitespace +// buffer. +// +func (p *printer) writeCommentSuffix(needsLinebreak bool) (wroteNewline, droppedFF bool) { + for i, ch := range p.wsbuf { + switch ch { + case blank, vtab: + // ignore trailing whitespace + p.wsbuf[i] = ignore + case indent, unindent: + // don't lose indentation information + case newline, formfeed: + // if we need a line break, keep exactly one + // but remember if we dropped any formfeeds + if needsLinebreak { + needsLinebreak = false + wroteNewline = true + } else { + if ch == formfeed { + droppedFF = true + } + p.wsbuf[i] = ignore + } + } + } + p.writeWhitespace(len(p.wsbuf)) + + // make sure we have a line break + if needsLinebreak { + p.writeByte('\n', 1) + wroteNewline = true + } + + return +} + +// containsLinebreak reports whether the whitespace buffer contains any line breaks. +func (p *printer) containsLinebreak() bool { + for _, ch := range p.wsbuf { + if ch == newline || ch == formfeed { + return true + } + } + return false +} + +// intersperseComments consumes all comments that appear before the next token +// tok and prints it together with the buffered whitespace (i.e., the whitespace +// that needs to be written before the next token). A heuristic is used to mix +// the comments and whitespace. The intersperseComments result indicates if a +// newline was written or if a formfeed was dropped from the whitespace buffer. +// +func (p *printer) intersperseComments(next token.Position, tok token.Token) (wroteNewline, droppedFF bool) { + var last *ast.Comment + for p.commentBefore(next) { + for _, c := range p.comment.List { + p.writeCommentPrefix(p.posFor(c.Pos()), next, last, tok) + p.writeComment(c) + last = c + } + p.nextComment() + } + + if last != nil { + // If the last comment is a /*-style comment and the next item + // follows on the same line but is not a comma, and not a "closing" + // token immediately following its corresponding "opening" token, + // add an extra separator unless explicitly disabled. Use a blank + // as separator unless we have pending linebreaks, they are not + // disabled, and we are outside a composite literal, in which case + // we want a linebreak (issue 15137). + // TODO(gri) This has become overly complicated. We should be able + // to track whether we're inside an expression or statement and + // use that information to decide more directly. + needsLinebreak := false + if p.mode&noExtraBlank == 0 && + last.Text[1] == '*' && p.lineFor(last.Pos()) == next.Line && + tok != token.COMMA && + (tok != token.RPAREN || p.prevOpen == token.LPAREN) && + (tok != token.RBRACK || p.prevOpen == token.LBRACK) { + if p.containsLinebreak() && p.mode&noExtraLinebreak == 0 && p.level == 0 { + needsLinebreak = true + } else { + p.writeByte(' ', 1) + } + } + // Ensure that there is a line break after a //-style comment, + // before EOF, and before a closing '}' unless explicitly disabled. + if last.Text[1] == '/' || + tok == token.EOF || + tok == token.RBRACE && p.mode&noExtraLinebreak == 0 { + needsLinebreak = true + } + return p.writeCommentSuffix(needsLinebreak) + } + + // no comment was written - we should never reach here since + // intersperseComments should not be called in that case + p.internalError("intersperseComments called without pending comments") + return +} + +// whiteWhitespace writes the first n whitespace entries. +func (p *printer) writeWhitespace(n int) { + // write entries + for i := 0; i < n; i++ { + switch ch := p.wsbuf[i]; ch { + case ignore: + // ignore! + case indent: + p.indent++ + case unindent: + p.indent-- + if p.indent < 0 { + p.internalError("negative indentation:", p.indent) + p.indent = 0 + } + case newline, formfeed: + // A line break immediately followed by a "correcting" + // unindent is swapped with the unindent - this permits + // proper label positioning. If a comment is between + // the line break and the label, the unindent is not + // part of the comment whitespace prefix and the comment + // will be positioned correctly indented. + if i+1 < n && p.wsbuf[i+1] == unindent { + // Use a formfeed to terminate the current section. + // Otherwise, a long label name on the next line leading + // to a wide column may increase the indentation column + // of lines before the label; effectively leading to wrong + // indentation. + p.wsbuf[i], p.wsbuf[i+1] = unindent, formfeed + i-- // do it again + continue + } + fallthrough + default: + p.writeByte(byte(ch), 1) + } + } + + // shift remaining entries down + l := copy(p.wsbuf, p.wsbuf[n:]) + p.wsbuf = p.wsbuf[:l] +} + +// ---------------------------------------------------------------------------- +// Printing interface + +// nlimit limits n to maxNewlines. +func nlimit(n int) int { + if n > maxNewlines { + n = maxNewlines + } + return n +} + +func mayCombine(prev token.Token, next byte) (b bool) { + //switch prev { + //case token.INT: + // b = next == '.' // 1. + //case token.ADD: + // b = next == '+' // ++ + //case token.SUB: + // b = next == '-' // -- + //case token.QUO: + // b = next == '*' // /* + //case token.LSS: + // b = next == '-' || next == '<' // <- or << + //case token.AND: + // b = next == '&' || next == '^' // && or &^ + //} + //return + return false +} + +// print prints a list of "items" (roughly corresponding to syntactic +// tokens, but also including whitespace and formatting information). +// It is the only print function that should be called directly from +// any of the AST printing functions in nodes.go. +// +// Whitespace is accumulated until a non-whitespace token appears. Any +// comments that need to appear before that token are printed first, +// taking into account the amount and structure of any pending white- +// space for best comment placement. Then, any leftover whitespace is +// printed, followed by the actual token. +// +func (p *printer) print(args ...interface{}) { + for _, arg := range args { + // information about the current arg + var data string + var isLit bool + var impliedSemi bool // value for p.impliedSemi after this arg + + // record previous opening token, if any + switch p.lastTok { + case token.ILLEGAL: + // ignore (white space) + case token.LPAREN, token.LBRACK: + p.prevOpen = p.lastTok + default: + // other tokens followed any opening token + p.prevOpen = token.ILLEGAL + } + + switch x := arg.(type) { + case pmode: + // toggle printer mode + p.mode ^= x + continue + + case whiteSpace: + if x == ignore { + // don't add ignore's to the buffer; they + // may screw up "correcting" unindents (see + // LabeledStmt) + continue + } + i := len(p.wsbuf) + if i == cap(p.wsbuf) { + // Whitespace sequences are very short so this should + // never happen. Handle gracefully (but possibly with + // bad comment placement) if it does happen. + p.writeWhitespace(i) + i = 0 + } + p.wsbuf = p.wsbuf[0 : i+1] + p.wsbuf[i] = x + if x == newline || x == formfeed { + // newlines affect the current state (p.impliedSemi) + // and not the state after printing arg (impliedSemi) + // because comments can be interspersed before the arg + // in this case + p.impliedSemi = false + } + p.lastTok = token.ILLEGAL + continue + + case *ast.Ident: + data = x.Name + impliedSemi = true + p.lastTok = token.IDENT + + case *ast.BasicLit: + data = x.Value + isLit = true + impliedSemi = true + p.lastTok = x.Kind + + case token.Token: + s := x.String() + if mayCombine(p.lastTok, s[0]) { + // the previous and the current token must be + // separated by a blank otherwise they combine + // into a different incorrect token sequence + // (except for token.INT followed by a '.' this + // should never happen because it is taken care + // of via binary expression formatting) + if len(p.wsbuf) != 0 { + p.internalError("whitespace buffer not empty") + } + p.wsbuf = p.wsbuf[0:1] + p.wsbuf[0] = ' ' + } + data = s + // some keywords followed by a newline imply a semicolon + switch x { + //case token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN, token.INC, token.DEC, + case token.RPAREN, token.RBRACK, token.RBRACE: + impliedSemi = true + } + p.lastTok = x + + case token.Pos: + if x.IsValid() { + p.pos = p.posFor(x) // accurate position of next item + } + continue + + case string: + // incorrect AST - print error message + data = x + isLit = true + impliedSemi = true + p.lastTok = token.STRING + + case ast.Keyword: + data = string(x) + impliedSemi = true + p.lastTok = token.IDENT + + default: + fmt.Fprintf(os.Stderr, "print: unsupported argument %v (%T)\n", arg, arg) + panic("go/printer type") + } + // data != "" + + next := p.pos // estimated/accurate position of next item + wroteNewline, droppedFF := p.flush(next, p.lastTok) + + // intersperse extra newlines if present in the source and + // if they don't cause extra semicolons (don't do this in + // flush as it will cause extra newlines at the end of a file) + if !p.impliedSemi { + n := nlimit(next.Line - p.pos.Line) + // don't exceed maxNewlines if we already wrote one + if wroteNewline && n == maxNewlines { + n = maxNewlines - 1 + } + if n > 0 { + ch := byte('\n') + if droppedFF { + ch = '\f' // use formfeed since we dropped one before + } + p.writeByte(ch, n) + impliedSemi = false + } + } + + // the next token starts now - record its line number if requested + if p.linePtr != nil { + *p.linePtr = p.out.Line + p.linePtr = nil + } + + p.writeString(next, data, isLit) + p.impliedSemi = impliedSemi + } +} + +// flush prints any pending comments and whitespace occurring textually +// before the position of the next token tok. The flush result indicates +// if a newline was written or if a formfeed was dropped from the whitespace +// buffer. +// +func (p *printer) flush(next token.Position, tok token.Token) (wroteNewline, droppedFF bool) { + if p.commentBefore(next) { + // if there are comments before the next item, intersperse them + wroteNewline, droppedFF = p.intersperseComments(next, tok) + } else { + // otherwise, write any leftover whitespace + p.writeWhitespace(len(p.wsbuf)) + } + return +} + +// getNode returns the ast.CommentGroup associated with n, if any. +func getDoc(n ast.Node) *ast.CommentGroup { + switch n := n.(type) { + case *ast.Field: + return n.Doc + case *ast.ImportSpec: + return n.Doc + //case *ast.ValueSpec: + // return n.Doc + case *ast.TypeSpec: + return n.Doc + case *ast.GenDecl: + return n.Doc + //case *ast.FuncDecl: + // return n.Doc + case *ast.File: + return n.Doc + } + return nil +} + +func getLastComment(n ast.Node) *ast.CommentGroup { + switch n := n.(type) { + case *ast.Field: + return n.Comment + case *ast.ImportSpec: + return n.Comment + //case *ast.ValueSpec: + // return n.Comment + case *ast.TypeSpec: + return n.Comment + case *ast.GenDecl: + if len(n.Specs) > 0 { + return getLastComment(n.Specs[len(n.Specs)-1]) + } + case *ast.File: + if len(n.Comments) > 0 { + return n.Comments[len(n.Comments)-1] + } + } + return nil +} + +func (p *printer) printNode(node interface{}) error { + // unpack *CommentedNode, if any + var comments []*ast.CommentGroup + if cnode, ok := node.(*CommentedNode); ok { + node = cnode.Node + comments = cnode.Comments + } + + if comments != nil { + // commented node - restrict comment list to relevant range + n, ok := node.(ast.Node) + if !ok { + goto unsupported + } + beg := n.Pos() + end := n.End() + // if the node has associated documentation, + // include that commentgroup in the range + // (the comment list is sorted in the order + // of the comment appearance in the source code) + if doc := getDoc(n); doc != nil { + beg = doc.Pos() + } + if com := getLastComment(n); com != nil { + if e := com.End(); e > end { + end = e + } + } + // token.Pos values are global offsets, we can + // compare them directly + i := 0 + for i < len(comments) && comments[i].End() < beg { + i++ + } + j := i + for j < len(comments) && comments[j].Pos() < end { + j++ + } + if i < j { + p.comments = comments[i:j] + } + } else if n, ok := node.(*ast.File); ok { + // use ast.File comments, if any + p.comments = n.Comments + } + + // if there are no comments, use node comments + p.useNodeComments = p.comments == nil + + // get comments ready for use + p.nextComment() + + p.print(pmode(0)) + + // format node + switch n := node.(type) { + case ast.Expr: + p.expr(n) + //case ast.Stmt: + // // A labeled statement will un-indent to position the label. + // // Set p.indent to 1 so we don't get indent "underflow". + // if _, ok := n.(*ast.LabeledStmt); ok { + // p.indent = 1 + // } + // p.stmt(n, false) + //case ast.Decl: + // p.decl(n) + //case ast.Spec: + // p.spec(n) + //case []ast.Stmt: + // // A labeled statement will un-indent to position the label. + // // Set p.indent to 1 so we don't get indent "underflow". + // for _, s := range n { + // if _, ok := s.(*ast.LabeledStmt); ok { + // p.indent = 1 + // } + // } + // p.stmtList(n, 0, false) + //case []ast.Decl: + // p.declList(n) + case *ast.File: + p.file(n) + default: + goto unsupported + } + + return nil + +unsupported: + return fmt.Errorf("go/printer: unsupported node type %T", node) +} + +// ---------------------------------------------------------------------------- +// Trimmer + +// A trimmer is an io.Writer filter for stripping tabwriter.Escape +// characters, trailing blanks and tabs, and for converting formfeed +// and vtab characters into newlines and htabs (in case no tabwriter +// is used). Text bracketed by tabwriter.Escape characters is passed +// through unchanged. +// +type trimmer struct { + output io.Writer + state int + space []byte +} + +// trimmer is implemented as a state machine. +// It can be in one of the following states: +const ( + inSpace = iota // inside space + inEscape // inside text bracketed by tabwriter.Escapes + inText // inside text +) + +func (p *trimmer) resetSpace() { + p.state = inSpace + p.space = p.space[0:0] +} + +// Design note: It is tempting to eliminate extra blanks occurring in +// whitespace in this function as it could simplify some +// of the blanks logic in the node printing functions. +// However, this would mess up any formatting done by +// the tabwriter. + +var aNewline = []byte("\n") + +func (p *trimmer) Write(data []byte) (n int, err error) { + // invariants: + // p.state == inSpace: + // p.space is unwritten + // p.state == inEscape, inText: + // data[m:n] is unwritten + m := 0 + var b byte + for n, b = range data { + if b == '\v' { + b = '\t' // convert to htab + } + switch p.state { + case inSpace: + switch b { + case '\t', ' ': + p.space = append(p.space, b) + case '\n', '\f': + p.resetSpace() // discard trailing space + _, err = p.output.Write(aNewline) + case tabwriter.Escape: + _, err = p.output.Write(p.space) + p.state = inEscape + m = n + 1 // +1: skip tabwriter.Escape + default: + _, err = p.output.Write(p.space) + p.state = inText + m = n + } + case inEscape: + if b == tabwriter.Escape { + _, err = p.output.Write(data[m:n]) + p.resetSpace() + } + case inText: + switch b { + case '\t', ' ': + _, err = p.output.Write(data[m:n]) + p.resetSpace() + p.space = append(p.space, b) + case '\n', '\f': + _, err = p.output.Write(data[m:n]) + p.resetSpace() + if err == nil { + _, err = p.output.Write(aNewline) + } + case tabwriter.Escape: + _, err = p.output.Write(data[m:n]) + p.state = inEscape + m = n + 1 // +1: skip tabwriter.Escape + } + default: + panic("unreachable") + } + if err != nil { + return + } + } + n = len(data) + + switch p.state { + case inEscape, inText: + _, err = p.output.Write(data[m:n]) + p.resetSpace() + } + + return +} + +// ---------------------------------------------------------------------------- +// Public interface + +// A Mode value is a set of flags (or 0). They control printing. +type Mode uint + +const ( + RawFormat Mode = 1 << iota // do not use a tabwriter; if set, UseSpaces is ignored + TabIndent // use tabs for indentation independent of UseSpaces + UseSpaces // use spaces instead of tabs for alignment + SourcePos // emit //line directives to preserve original source positions +) + +// The mode below is not included in printer's public API because +// editing code text is deemed out of scope. Because this mode is +// unexported, it's also possible to modify or remove it based on +// the evolving needs of go/format and cmd/gofmt without breaking +// users. See discussion in CL 240683. +const ( + // normalizeNumbers means to canonicalize number + // literal prefixes and exponents while printing. + // + // This value is known in and used by go/format and cmd/gofmt. + // It is currently more convenient and performant for those + // packages to apply number normalization during printing, + // rather than by modifying the AST in advance. + normalizeNumbers Mode = 1 << 30 +) + +// A Config node controls the output of Fprint. +type Config struct { + Mode Mode // default: 0 + Tabwidth int // default: 8 + Indent int // default: 0 (all code is indented at least by this much) +} + +// fprint implements Fprint and takes a nodesSizes map for setting up the printer state. +func (cfg *Config) fprint(output io.Writer, fset *token.FileSet, node interface{}, nodeSizes map[ast.Node]int) (err error) { + // print node + var p printer + p.init(cfg, fset, nodeSizes) + if err = p.printNode(node); err != nil { + return + } + // print outstanding comments + p.impliedSemi = false // EOF acts like a newline + p.flush(token.Position{Offset: infinity, Line: infinity}, token.EOF) + + // output is buffered in p.output now. + // fix //go:build and // +build comments if needed. + //p.fixGoBuildLines() + + // redirect output through a trimmer to eliminate trailing whitespace + // (Input to a tabwriter must be untrimmed since trailing tabs provide + // formatting information. The tabwriter could provide trimming + // functionality but no tabwriter is used when RawFormat is set.) + output = &trimmer{output: output} + + // redirect output through a tabwriter if necessary + if cfg.Mode&RawFormat == 0 { + minwidth := cfg.Tabwidth + + padchar := byte('\t') + if cfg.Mode&UseSpaces != 0 { + padchar = ' ' + } + + twmode := tabwriter.DiscardEmptyColumns + if cfg.Mode&TabIndent != 0 { + minwidth = 0 + twmode |= tabwriter.TabIndent + } + + output = tabwriter.NewWriter(output, minwidth, cfg.Tabwidth, 1, padchar, twmode) + } + + // write printer result via tabwriter/trimmer to output + if _, err = output.Write(p.output); err != nil { + return + } + + // flush tabwriter, if any + if tw, _ := output.(*tabwriter.Writer); tw != nil { + err = tw.Flush() + } + + return +} + +// A CommentedNode bundles an AST node and corresponding comments. +// It may be provided as argument to any of the Fprint functions. +// +type CommentedNode struct { + Node interface{} // *ast.File, or ast.Expr, ast.Decl, ast.Spec, or ast.Stmt + Comments []*ast.CommentGroup +} + +// Fprint "pretty-prints" an AST node to output for a given configuration cfg. +// Position information is interpreted relative to the file set fset. +// The node type must be *ast.File, *CommentedNode, []ast.Decl, []ast.Stmt, +// or assignment-compatible to ast.Expr, ast.Decl, ast.Spec, or ast.Stmt. +// +func (cfg *Config) Fprint(output io.Writer, fset *token.FileSet, node interface{}) error { + return cfg.fprint(output, fset, node, make(map[ast.Node]int)) +} + +// Fprint "pretty-prints" an AST node to output. +// It calls Config.Fprint with default settings. +// Note that gofmt uses tabs for indentation but spaces for alignment; +// use format.Node (package go/format) for output that matches gofmt. +// +func Fprint(output io.Writer, fset *token.FileSet, node interface{}) error { + return (&Config{Tabwidth: 8}).Fprint(output, fset, node) +} From ae5caf119d91e0ee66cb6fec5fb4d58a88dda8a5 Mon Sep 17 00:00:00 2001 From: dylan Date: Sun, 19 Jun 2022 21:14:59 +0800 Subject: [PATCH 5/5] fix: fix api ext decl --- printer/nodes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/printer/nodes.go b/printer/nodes.go index 499e06e..98c4b7d 100644 --- a/printer/nodes.go +++ b/printer/nodes.go @@ -245,8 +245,8 @@ func (p *printer) serviceExtDecl(node *ast.ServiceExtDecl) { p.print(indent, formfeed) for _, each := range node.Kvs { //p.print(each.Pos()) - p.expr(each) p.print(newline) + p.expr(each) } p.print(unindent, formfeed, token.RPAREN) }