Skip to content

Commit

Permalink
feat(compiler): add offset encoding option
Browse files Browse the repository at this point in the history
  • Loading branch information
Myriad-Dreamin committed Nov 4, 2023
1 parent 206d7c8 commit 33eaebf
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 43 deletions.
2 changes: 1 addition & 1 deletion compiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ sha2.workspace = true
flate2.workspace = true
ecow.workspace = true
instant.workspace = true
strum = { version = "0.25.0", features = ["derive"] }
strum.workspace = true

serde.workspace = true
serde_json.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub mod vfs;
pub mod world;

/// Diff and parse the source code.
mod parser;
pub mod parser;
mod utils;

/// Convenient services over [`world::CompilerWorld`].
Expand Down
3 changes: 2 additions & 1 deletion compiler/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use typst::{diag::FileResult, syntax::Source};
use typst_ts_core::TypstFileId;

pub use semantic_tokens::{
get_semantic_tokens_full, get_semantic_tokens_legend, SemanticToken, SemanticTokensLegend,
get_semantic_tokens_full, get_semantic_tokens_legend, OffsetEncoding, SemanticToken,
SemanticTokensLegend,
};

pub fn reparse(source_id: TypstFileId, prev: Option<Source>, next: String) -> FileResult<Source> {
Expand Down
59 changes: 34 additions & 25 deletions compiler/src/parser/semantic_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,26 @@ pub fn get_semantic_tokens_legend() -> SemanticTokensLegend {
}
}

pub fn get_semantic_tokens_full(source: &Source) -> Vec<SemanticToken> {
pub enum OffsetEncoding {
Utf8,
Utf16,
}

pub fn get_semantic_tokens_full(source: &Source, encoding: OffsetEncoding) -> Vec<SemanticToken> {
let root = LinkedNode::new(source.root());
let mut full = tokenize_tree(source, &root, ModifierSet::empty());
let mut full = tokenize_tree(&root, ModifierSet::empty());

let mut init = (0, 0);
for token in full.iter_mut() {
// resolve offset to position
let offset = ((token.delta_line as u64) << 32) | token.delta_start_character as u64;
let position = (match encoding {
OffsetEncoding::Utf8 => offset_to_position_utf8,
OffsetEncoding::Utf16 => offset_to_position_utf16,
})(offset as usize, source);
token.delta_line = position.0;
token.delta_start_character = position.1;

let next = (token.delta_line, token.delta_start_character);
token.delta_line -= init.0;
if token.delta_line == 0 {
Expand All @@ -50,30 +64,22 @@ pub fn get_semantic_tokens_full(source: &Source) -> Vec<SemanticToken> {
full
}

fn tokenize_single_node(
ctx: &Source,
node: &LinkedNode,
modifiers: ModifierSet,
) -> Option<SemanticToken> {
fn tokenize_single_node(node: &LinkedNode, modifiers: ModifierSet) -> Option<SemanticToken> {
let is_leaf = node.children().next().is_none();

token_from_node(node)
.or_else(|| is_leaf.then_some(TokenType::Text))
.map(|token_type| SemanticToken::new(ctx, token_type, modifiers, node))
.map(|token_type| SemanticToken::new(token_type, modifiers, node))
}

/// Tokenize a node and its children
fn tokenize_tree(
ctx: &Source,
root: &LinkedNode<'_>,
parent_modifiers: ModifierSet,
) -> Vec<SemanticToken> {
fn tokenize_tree(root: &LinkedNode<'_>, parent_modifiers: ModifierSet) -> Vec<SemanticToken> {
let modifiers = parent_modifiers | modifiers_from_node(root);

let token = tokenize_single_node(ctx, root, modifiers).into_iter();
let token = tokenize_single_node(root, modifiers).into_iter();
let children = root
.children()
.flat_map(move |child| tokenize_tree(ctx, &child, modifiers));
.flat_map(move |child| tokenize_tree(&child, modifiers));
token.chain(children).collect()
}

Expand All @@ -86,21 +92,17 @@ pub struct SemanticToken {
}

impl SemanticToken {
pub fn new(
ctx: &Source,
token_type: TokenType,
modifiers: ModifierSet,
node: &LinkedNode,
) -> Self {
fn new(token_type: TokenType, modifiers: ModifierSet, node: &LinkedNode) -> Self {
let source = node.get().clone().into_text();

let position = offset_to_position(node.offset(), ctx);
let raw_position = node.offset() as u64;
let raw_position = ((raw_position >> 32) as u32, raw_position as u32);

Self {
token_type: token_type as u32,
token_modifiers: modifiers.bitset(),
delta_line: position.0,
delta_start_character: position.1,
delta_line: raw_position.0,
delta_start_character: raw_position.1,
length: source.chars().map(char::len_utf16).sum::<usize>() as u32,
}
}
Expand Down Expand Up @@ -202,7 +204,14 @@ fn token_from_hashtag(hashtag: &LinkedNode) -> Option<TokenType> {
.and_then(token_from_node)
}

fn offset_to_position(typst_offset: usize, typst_source: &Source) -> (u32, u32) {
fn offset_to_position_utf8(typst_offset: usize, typst_source: &Source) -> (u32, u32) {
let line_index = typst_source.byte_to_line(typst_offset).unwrap();
let column_index = typst_source.byte_to_column(typst_offset).unwrap();

(line_index as u32, column_index as u32)
}

fn offset_to_position_utf16(typst_offset: usize, typst_source: &Source) -> (u32, u32) {
let line_index = typst_source.byte_to_line(typst_offset).unwrap();

let lsp_line = line_index as u32;
Expand Down
27 changes: 16 additions & 11 deletions compiler/src/world.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ use crate::{
dependency::{DependencyTree, DependentFileInfo},
package::Registry as PackageRegistry,
parser::{
get_semantic_tokens_full, get_semantic_tokens_legend, SemanticToken, SemanticTokensLegend,
get_semantic_tokens_full, get_semantic_tokens_legend, OffsetEncoding, SemanticToken,
SemanticTokensLegend,
},
service::WorkspaceProvider,
time::SystemTime,
Expand Down Expand Up @@ -207,17 +208,21 @@ impl<F: CompilerFeat> CompilerWorld<F> {
Arc::new(get_semantic_tokens_legend())
}

pub fn get_semantic_tokens(&self, file_path: Option<String>) -> Arc<Vec<SemanticToken>> {
Arc::new(get_semantic_tokens_full(
&file_path
.and_then(|e| {
let relative_path = Path::new(&e).strip_prefix(&self.workspace_root()).ok()?;
pub fn get_semantic_tokens(
&self,
file_path: Option<String>,
encoding: OffsetEncoding,
) -> Arc<Vec<SemanticToken>> {
let src = &file_path
.and_then(|e| {
let relative_path = Path::new(&e).strip_prefix(&self.workspace_root()).ok()?;

let source_id = FileId::new(None, VirtualPath::new(relative_path));
self.source(source_id).ok()
})
.unwrap_or_else(|| self.main());

let source_id = FileId::new(None, VirtualPath::new(relative_path));
self.source(source_id).ok()
})
.unwrap_or_else(|| self.main()),
))
Arc::new(get_semantic_tokens_full(src, encoding))
}

fn map_source_or_default<T>(
Expand Down
13 changes: 12 additions & 1 deletion packages/compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub use typst_ts_compiler::*;
use typst_ts_compiler::{
font::web::BrowserFontSearcher,
package::browser::ProxyRegistry,
parser::OffsetEncoding,
service::{CompileDriverImpl, Compiler},
vfs::browser::ProxyAccessModel,
world::WorldSnapshot,
Expand Down Expand Up @@ -206,6 +207,7 @@ impl TypstCompiler {

pub fn get_semantic_tokens(
&mut self,
offset_encoding: String,
file_path: Option<String>,
result_id: Option<String>,
) -> Result<js_sys::Object, JsValue> {
Expand All @@ -215,7 +217,16 @@ impl TypstCompiler {
);
}

let tokens = self.compiler.world_mut().get_semantic_tokens(file_path);
let tokens = self.compiler.world_mut().get_semantic_tokens(
file_path,
match offset_encoding.as_str() {
"utf-16" => OffsetEncoding::Utf16,
"utf-8" => OffsetEncoding::Utf8,
_ => {
return Err(error_once!("Unsupported offset encoding", offset_encoding: offset_encoding).into());
}
},
);
let mut result = Vec::new();
for token in tokens.iter() {
result.push(token.delta_line);
Expand Down
28 changes: 25 additions & 3 deletions packages/typst.ts/src/compiler.mts
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,20 @@ export interface TypstCompiler {
/**
* experimental
* See Semantic tokens: https://github.com/microsoft/vscode/issues/86415
*
* @param {string} opts.mainFilePath - The path of the main file.
* @param {string} opts.resultId - The id of the result.
* @param {string} opts.offsetEncoding - The encoding of the offset.
* - 'utf-16': the offset is encoded in utf-16.
* - 'utf-8': the offset is encoded in utf-8.
* @default 'utf-16'
* @returns {Promise<SemanticTokens>} - The semantic tokens.
*/
getSemanticTokens(opts: { mainFilePath: string; resultId?: string }): Promise<SemanticTokens>;
getSemanticTokens(opts: {
mainFilePath: string;
resultId?: string;
offsetEncoding?: string;
}): Promise<SemanticTokens>;
}

const gCompilerModule = new LazyWasmModule(async (bin?: any) => {
Expand Down Expand Up @@ -193,10 +205,20 @@ class TypstCompilerDriver {
});
}

getSemanticTokens(opts: { mainFilePath: string; resultId?: string }): Promise<SemanticTokens> {
getSemanticTokens(opts: {
mainFilePath: string;
resultId?: string;
offsetEncoding?: string;
}): Promise<SemanticTokens> {
return new Promise<SemanticTokens>(resolve => {
this.compiler.reset();
resolve(this.compiler.get_semantic_tokens(opts.mainFilePath, opts.resultId) as any);
resolve(
this.compiler.get_semantic_tokens(
opts.offsetEncoding || 'utf-16',
opts.mainFilePath,
opts.resultId,
) as any,
);
});
}

Expand Down
2 changes: 2 additions & 0 deletions packages/typst.ts/src/contrib/snippet.mts
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ export class TypstSnippet {

/**
* Get semantic tokens for the document.
* See {@link SweetCompileOptions}.
* See {@link TypstCompiler#getSemanticTokens}.
*/
async getSemanticTokens(
opts: SweetCompileOptions & { resultId?: string },
Expand Down

0 comments on commit 33eaebf

Please sign in to comment.