diff --git a/Cargo.toml b/Cargo.toml index 659b4817..19065695 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,6 +82,7 @@ parking_lot = "0.12.1" pollster = "0.3.0" rayon = "1.7.0" tokio = { version = "1.28.1", features = ["full"] } +strum = { version = "0.25.0", features = ["derive"] } # data structure and algorithm append-only-vec = "0.1.2" diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index 776d29aa..6b82a8e2 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -23,7 +23,7 @@ sha2.workspace = true flate2.workspace = true ecow.workspace = true instant.workspace = true -strum = { version = "0.25.0", features = ["derive"] } +strum.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 3611783d..4b25cd14 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -41,7 +41,7 @@ pub mod vfs; pub mod world; /// Diff and parse the source code. -mod parser; +pub mod parser; mod utils; /// Convenient services over [`world::CompilerWorld`]. diff --git a/compiler/src/parser/mod.rs b/compiler/src/parser/mod.rs index f04cf259..9e0dfd3b 100644 --- a/compiler/src/parser/mod.rs +++ b/compiler/src/parser/mod.rs @@ -8,7 +8,8 @@ use typst::{diag::FileResult, syntax::Source}; use typst_ts_core::TypstFileId; pub use semantic_tokens::{ - get_semantic_tokens_full, get_semantic_tokens_legend, SemanticToken, SemanticTokensLegend, + get_semantic_tokens_full, get_semantic_tokens_legend, OffsetEncoding, SemanticToken, + SemanticTokensLegend, }; pub fn reparse(source_id: TypstFileId, prev: Option, next: String) -> FileResult { diff --git a/compiler/src/parser/semantic_tokens.rs b/compiler/src/parser/semantic_tokens.rs index 0601e4ec..38596b49 100644 --- a/compiler/src/parser/semantic_tokens.rs +++ b/compiler/src/parser/semantic_tokens.rs @@ -33,12 +33,26 @@ pub fn get_semantic_tokens_legend() -> SemanticTokensLegend { } } -pub fn get_semantic_tokens_full(source: &Source) -> Vec { +pub enum OffsetEncoding { + Utf8, + Utf16, +} + +pub fn get_semantic_tokens_full(source: &Source, encoding: OffsetEncoding) -> Vec { let root = LinkedNode::new(source.root()); - let mut full = tokenize_tree(source, &root, ModifierSet::empty()); + let mut full = tokenize_tree(&root, ModifierSet::empty()); let mut init = (0, 0); for token in full.iter_mut() { + // resolve offset to position + let offset = ((token.delta_line as u64) << 32) | token.delta_start_character as u64; + let position = (match encoding { + OffsetEncoding::Utf8 => offset_to_position_utf8, + OffsetEncoding::Utf16 => offset_to_position_utf16, + })(offset as usize, source); + token.delta_line = position.0; + token.delta_start_character = position.1; + let next = (token.delta_line, token.delta_start_character); token.delta_line -= init.0; if token.delta_line == 0 { @@ -50,30 +64,22 @@ pub fn get_semantic_tokens_full(source: &Source) -> Vec { full } -fn tokenize_single_node( - ctx: &Source, - node: &LinkedNode, - modifiers: ModifierSet, -) -> Option { +fn tokenize_single_node(node: &LinkedNode, modifiers: ModifierSet) -> Option { let is_leaf = node.children().next().is_none(); token_from_node(node) .or_else(|| is_leaf.then_some(TokenType::Text)) - .map(|token_type| SemanticToken::new(ctx, token_type, modifiers, node)) + .map(|token_type| SemanticToken::new(token_type, modifiers, node)) } /// Tokenize a node and its children -fn tokenize_tree( - ctx: &Source, - root: &LinkedNode<'_>, - parent_modifiers: ModifierSet, -) -> Vec { +fn tokenize_tree(root: &LinkedNode<'_>, parent_modifiers: ModifierSet) -> Vec { let modifiers = parent_modifiers | modifiers_from_node(root); - let token = tokenize_single_node(ctx, root, modifiers).into_iter(); + let token = tokenize_single_node(root, modifiers).into_iter(); let children = root .children() - .flat_map(move |child| tokenize_tree(ctx, &child, modifiers)); + .flat_map(move |child| tokenize_tree(&child, modifiers)); token.chain(children).collect() } @@ -86,21 +92,17 @@ pub struct SemanticToken { } impl SemanticToken { - pub fn new( - ctx: &Source, - token_type: TokenType, - modifiers: ModifierSet, - node: &LinkedNode, - ) -> Self { + fn new(token_type: TokenType, modifiers: ModifierSet, node: &LinkedNode) -> Self { let source = node.get().clone().into_text(); - let position = offset_to_position(node.offset(), ctx); + let raw_position = node.offset() as u64; + let raw_position = ((raw_position >> 32) as u32, raw_position as u32); Self { token_type: token_type as u32, token_modifiers: modifiers.bitset(), - delta_line: position.0, - delta_start_character: position.1, + delta_line: raw_position.0, + delta_start_character: raw_position.1, length: source.chars().map(char::len_utf16).sum::() as u32, } } @@ -202,7 +204,14 @@ fn token_from_hashtag(hashtag: &LinkedNode) -> Option { .and_then(token_from_node) } -fn offset_to_position(typst_offset: usize, typst_source: &Source) -> (u32, u32) { +fn offset_to_position_utf8(typst_offset: usize, typst_source: &Source) -> (u32, u32) { + let line_index = typst_source.byte_to_line(typst_offset).unwrap(); + let column_index = typst_source.byte_to_column(typst_offset).unwrap(); + + (line_index as u32, column_index as u32) +} + +fn offset_to_position_utf16(typst_offset: usize, typst_source: &Source) -> (u32, u32) { let line_index = typst_source.byte_to_line(typst_offset).unwrap(); let lsp_line = line_index as u32; diff --git a/compiler/src/world.rs b/compiler/src/world.rs index be0de5eb..ae6f1f38 100644 --- a/compiler/src/world.rs +++ b/compiler/src/world.rs @@ -24,7 +24,8 @@ use crate::{ dependency::{DependencyTree, DependentFileInfo}, package::Registry as PackageRegistry, parser::{ - get_semantic_tokens_full, get_semantic_tokens_legend, SemanticToken, SemanticTokensLegend, + get_semantic_tokens_full, get_semantic_tokens_legend, OffsetEncoding, SemanticToken, + SemanticTokensLegend, }, service::WorkspaceProvider, time::SystemTime, @@ -207,17 +208,21 @@ impl CompilerWorld { Arc::new(get_semantic_tokens_legend()) } - pub fn get_semantic_tokens(&self, file_path: Option) -> Arc> { - Arc::new(get_semantic_tokens_full( - &file_path - .and_then(|e| { - let relative_path = Path::new(&e).strip_prefix(&self.workspace_root()).ok()?; + pub fn get_semantic_tokens( + &self, + file_path: Option, + encoding: OffsetEncoding, + ) -> Arc> { + let src = &file_path + .and_then(|e| { + let relative_path = Path::new(&e).strip_prefix(&self.workspace_root()).ok()?; + + let source_id = FileId::new(None, VirtualPath::new(relative_path)); + self.source(source_id).ok() + }) + .unwrap_or_else(|| self.main()); - let source_id = FileId::new(None, VirtualPath::new(relative_path)); - self.source(source_id).ok() - }) - .unwrap_or_else(|| self.main()), - )) + Arc::new(get_semantic_tokens_full(src, encoding)) } fn map_source_or_default( diff --git a/packages/compiler/src/lib.rs b/packages/compiler/src/lib.rs index 51624480..de36a295 100644 --- a/packages/compiler/src/lib.rs +++ b/packages/compiler/src/lib.rs @@ -7,6 +7,7 @@ pub use typst_ts_compiler::*; use typst_ts_compiler::{ font::web::BrowserFontSearcher, package::browser::ProxyRegistry, + parser::OffsetEncoding, service::{CompileDriverImpl, Compiler}, vfs::browser::ProxyAccessModel, world::WorldSnapshot, @@ -206,6 +207,7 @@ impl TypstCompiler { pub fn get_semantic_tokens( &mut self, + offset_encoding: String, file_path: Option, result_id: Option, ) -> Result { @@ -215,7 +217,16 @@ impl TypstCompiler { ); } - let tokens = self.compiler.world_mut().get_semantic_tokens(file_path); + let tokens = self.compiler.world_mut().get_semantic_tokens( + file_path, + match offset_encoding.as_str() { + "utf-16" => OffsetEncoding::Utf16, + "utf-8" => OffsetEncoding::Utf8, + _ => { + return Err(error_once!("Unsupported offset encoding", offset_encoding: offset_encoding).into()); + } + }, + ); let mut result = Vec::new(); for token in tokens.iter() { result.push(token.delta_line); diff --git a/packages/typst.ts/src/compiler.mts b/packages/typst.ts/src/compiler.mts index 2758adb3..e867147c 100644 --- a/packages/typst.ts/src/compiler.mts +++ b/packages/typst.ts/src/compiler.mts @@ -116,8 +116,20 @@ export interface TypstCompiler { /** * experimental * See Semantic tokens: https://github.com/microsoft/vscode/issues/86415 + * + * @param {string} opts.mainFilePath - The path of the main file. + * @param {string} opts.resultId - The id of the result. + * @param {string} opts.offsetEncoding - The encoding of the offset. + * - 'utf-16': the offset is encoded in utf-16. + * - 'utf-8': the offset is encoded in utf-8. + * @default 'utf-16' + * @returns {Promise} - The semantic tokens. */ - getSemanticTokens(opts: { mainFilePath: string; resultId?: string }): Promise; + getSemanticTokens(opts: { + mainFilePath: string; + resultId?: string; + offsetEncoding?: string; + }): Promise; } const gCompilerModule = new LazyWasmModule(async (bin?: any) => { @@ -193,10 +205,20 @@ class TypstCompilerDriver { }); } - getSemanticTokens(opts: { mainFilePath: string; resultId?: string }): Promise { + getSemanticTokens(opts: { + mainFilePath: string; + resultId?: string; + offsetEncoding?: string; + }): Promise { return new Promise(resolve => { this.compiler.reset(); - resolve(this.compiler.get_semantic_tokens(opts.mainFilePath, opts.resultId) as any); + resolve( + this.compiler.get_semantic_tokens( + opts.offsetEncoding || 'utf-16', + opts.mainFilePath, + opts.resultId, + ) as any, + ); }); } diff --git a/packages/typst.ts/src/contrib/snippet.mts b/packages/typst.ts/src/contrib/snippet.mts index 3f052fbb..fdf2e5e5 100644 --- a/packages/typst.ts/src/contrib/snippet.mts +++ b/packages/typst.ts/src/contrib/snippet.mts @@ -434,6 +434,8 @@ export class TypstSnippet { /** * Get semantic tokens for the document. + * See {@link SweetCompileOptions}. + * See {@link TypstCompiler#getSemanticTokens}. */ async getSemanticTokens( opts: SweetCompileOptions & { resultId?: string },