Skip to content

Commit

Permalink
wip: kernel computation feature (#27)
Browse files Browse the repository at this point in the history
* fix: improve data parser

* feat: init kernel computing feature powered by duckdb
  • Loading branch information
bruceyyu authored Aug 28, 2024
1 parent f93a1e6 commit 980498a
Show file tree
Hide file tree
Showing 12 changed files with 284 additions and 48 deletions.
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GWalkR
Title: Interactive Exploratory Data Analysis Tool
Version: 0.1.5
Version: 0.2.0
Authors@R: c(
person("Yue", "Yu", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-9302-0793")),
Expand All @@ -17,4 +17,7 @@ Imports:
htmlwidgets,
jsonlite,
openssl,
shiny
shiny,
shinycssloaders,
DBI,
duckdb
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
export(gwalkr)
export(gwalkrOutput)
export(renderGwalkr)
import(DBI)
import(duckdb)
import(htmlwidgets)
import(openssl)
import(shiny)
import(shinycssloaders)
importFrom(jsonlite,toJSON)
56 changes: 56 additions & 0 deletions R/duckdb_utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
library(DBI)
library(duckdb)

my_env <- new.env()

duckdb_register_con <- function(df) {
my_env$con <- dbConnect(duckdb::duckdb(), ":memory:")
DBI::dbWriteTable(my_env$con, "gwalkr_mid_table", as.data.frame(df), overwrite = FALSE)
}

duckdb_unregister_con <- function(df) {
if (!is.null(my_env$con)) {
dbDisconnect(my_env$con)
my_env$con <- NULL # Set to NULL after disconnecting
}
}

duckdb_get_field_meta <- function() {
if (exists("con", envir = my_env)) {
result <- dbGetQuery(my_env$con, 'SELECT * FROM gwalkr_mid_table LIMIT 1')
if (nrow(result) > 0) {
return(get_data_meta_type(result))
}
} else {
stop("Database connection not found.")
}
}

duckdb_get_data <- function(sql) {
if (exists("con", envir = my_env)) {
result <- dbGetQuery(my_env$con, sql)
if (nrow(result) > 0) {
return(result)
}
} else {
stop("Database connection not found.")
}
}

get_data_meta_type <- function(data) {
meta_types <- list()

for (key in names(data)) {
value <- data[[key]]
field_meta_type <- if (inherits(value, "POSIXct")) {
if (!is.null(attr(value, "tzone"))) "datetime_tz" else "datetime"
} else if (is.numeric(value)) {
"number"
} else {
"string"
}
meta_types <- append(meta_types, list(list(key = key, type = field_meta_type)))
}

return(meta_types)
}
58 changes: 34 additions & 24 deletions R/gwalkr.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@
#'
#' @import htmlwidgets
#' @import openssl
#' @importFrom jsonlite toJSON
#' @import shiny
#' @import shinycssloaders
#' @import DBI
#' @import duckdb
#'
#' @param data A data frame to be visualized in the GWalkR. The data frame should not be empty.
#' @param lang A character string specifying the language for the widget. Possible values are "en" (default), "ja", "zh".
#' @param dark A character string specifying the dark mode preference. Possible values are "light" (default), "dark", "media".
#' @param columnSpecs An optional list of lists to manually specify the types of some columns in the data frame.
#' Each top level element in the list corresponds to a column, and the list assigned to each column should have
#' two elements: `analyticalType` and `semanticType`. `analyticalType` can
#' only be one of "measure" or "dimension". `semanticType` can only be one of
#' @param columnSpecs An optional list of lists to manually specify the types of some columns in the data frame.
#' Each top level element in the list corresponds to a column, and the list assigned to each column should have
#' two elements: `analyticalType` and `semanticType`. `analyticalType` can
#' only be one of "measure" or "dimension". `semanticType` can only be one of
#' "quantitative", "temporal", "nominal" or "ordinal". For example:
#' \code{list(
#' "gender" = list(analyticalType = "dimension", semanticType = "nominal"),
Expand All @@ -28,42 +33,47 @@
#' gwalkr(mtcars)
#'
#' @export
gwalkr <- function(data, lang = "en", dark = "light", columnSpecs = list(), visConfig = NULL, visConfigFile = NULL, toolbarExclude = list()) {
gwalkr <- function(data, lang = "en", dark = "light", columnSpecs = list(), visConfig = NULL, visConfigFile = NULL, toolbarExclude = list(), useKernel = FALSE) {
if (!is.data.frame(data)) stop("data must be a data frame")
if (!is.null(visConfig) && !is.null(visConfigFile)) stop("visConfig and visConfigFile are mutually exclusive")
lang <- match.arg(lang, choices = c("en", "ja", "zh"))

rawFields <- raw_fields(data, columnSpecs)
colnames(data) <- sapply(colnames(data), fname_encode)

if (!is.null(visConfigFile)) {
visConfig <- readLines(visConfigFile, warn=FALSE)
}
# forward options using x
x = list(
dataSource = jsonlite::toJSON(data),
rawFields = rawFields,
i18nLang = lang,
visSpec = visConfig,
dark = dark,
toolbarExclude = toolbarExclude
)

# create widget
htmlwidgets::createWidget(
name = 'gwalkr',
x,
package = 'GWalkR',
width='100%',
height='100%'
)
if (useKernel) {
gwalkr_kernel(data, lang, dark, rawFields, visConfig, toolbarExclude)
} else {
x = list(
dataSource = toJSON(data),
rawFields = rawFields,
i18nLang = lang,
visSpec = visConfig,
dark = dark,
toolbarExclude = toolbarExclude,
useKernel = FALSE
)

# create widget
htmlwidgets::createWidget(
name = 'gwalkr',
x,
package = 'GWalkR',
width='100%',
height='100%'
)
}
}

#' Shiny bindings for gwalkr
#'
#' Output and render functions for using gwalkr within Shiny
#' applications and interactive Rmd documents.
#'
#'
#' @import shiny
#'
#' @param outputId output variable to read from
Expand Down
69 changes: 69 additions & 0 deletions R/gwalkr_kernel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
gwalkr_kernel <- function(data, lang, dark, rawFields, visConfig, toolbarExclude) {
cat("GWalkR kernel mode init...")

filter_func <- function(data, req) {
query <- parseQueryString(req$QUERY_STRING)

res <- duckdb_get_data(query$sql)

json <- toJSON(
res,
auto_unbox = TRUE
)

httpResponse(
status = 200L,
content_type = "application/json",
content = json
)
}

app <- shinyApp(
ui = fluidPage(
shinycssloaders::withSpinner(
gwalkrOutput("gwalkr_kernel"),
proxy.height="400px"
)
),

server = function(input, output, session) {
path <- session$registerDataObj(
"GWALKR",
NULL,
filter_func
)

duckdb_register_con(data)
fieldMetas <- duckdb_get_field_meta()

x = list(
rawFields = rawFields,
i18nLang = lang,
visSpec = visConfig,
dark = dark,
toolbarExclude = toolbarExclude,
useKernel = TRUE,
fieldMetas = fieldMetas,
endpointPath = path
)

output$gwalkr_kernel = renderGwalkr({
htmlwidgets::createWidget(
name = 'gwalkr',
x,
package = 'GWalkR',
width='100%',
height='100%'
)
})
session$onSessionEnded(function() {
cat("GwalkR closed")
duckdb_unregister_con()
})
},

options=c(launch.browser = .rs.invokeShinyPaneViewer)
)

if (interactive()) app
}
3 changes: 2 additions & 1 deletion man/gwalkr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion web_app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
},
"dependencies": {
"@kanaries/graphic-walker": "^0.4.70",
"@kanaries/gw-dsl-parser": "^0.1.49",
"@rollup/plugin-commonjs": "^25.0.2",
"@rollup/plugin-replace": "^5.0.2",
"@rollup/plugin-terser": "^0.4.3",
"@rollup/plugin-typescript": "^11.1.2",
"mobx-react-lite": "^3.4.3",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"styled-components": "^5.3.6"
"styled-components": "^5.3.6",
"vite-plugin-wasm": "^3.3.0"
},
"devDependencies": {
"@types/react": "^18.2.14",
Expand Down
31 changes: 31 additions & 0 deletions web_app/src/dataSource/index.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import type { IDataQueryPayload, IRow } from "@kanaries/graphic-walker/interfaces";
import { parser_dsl_with_meta } from "@kanaries/gw-dsl-parser";

const DEFAULT_LIMIT = 50_000;

const sendHTTPData = (sql: string, endpointPath: string) => {
return new Promise((resolve, reject) => {
fetch(`${endpointPath}&sql=${encodeURIComponent(sql)}`)
.then((response) => response.json())
.then((data) => {
console.log("Processed data from R:", data);
resolve(data);
})
.catch((error) => {
console.error("Error:", error);
reject(error);
});
});
};

export function getDataFromKernelBySql(fieldMetas: { key: string; type: string }[], endpointPath: string) {
return async (payload: IDataQueryPayload) => {
const sql = parser_dsl_with_meta(
"gwalkr_mid_table",
JSON.stringify({ ...payload, limit: payload.limit ?? DEFAULT_LIMIT }),
JSON.stringify({ gwalkr_mid_table: fieldMetas })
);
const result = (await sendHTTPData(sql, endpointPath)) ?? [];
return result as IRow[];
};
}
67 changes: 53 additions & 14 deletions web_app/src/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@ import CodeExportModal from "./components/codeExportModal";
import { StyleSheetManager } from "styled-components";
import tailwindStyle from "tailwindcss/tailwind.css?inline";
import formatSpec from "./utils/formatSpec";
import { getDataFromKernelBySql } from "./dataSource";

import initDslParser from "@kanaries/gw-dsl-parser";
import wasmPath from "@kanaries/gw-dsl-parser/gw_dsl_parser_bg.wasm?url";

const App: React.FC<IAppProps> = observer((propsIn) => {
const { dataSource, visSpec, rawFields, toolbarExclude, ...props } = propsIn;
const { dataSource, visSpec, rawFields, toolbarExclude, useKernel, ...props } = propsIn;
const storeRef = React.useRef<VizSpecStore | null>(null);

const specList = visSpec ? formatSpec(JSON.parse(visSpec) as any[], rawFields) : undefined;
Expand All @@ -25,14 +29,34 @@ const App: React.FC<IAppProps> = observer((propsIn) => {
exclude: toolbarExclude ? [...toolbarExclude, "export_code"] : ["export_code"],
extra: tools,
};
return (
<React.StrictMode>
<div className="h-full w-full overflow-y-scroll font-sans">
<CodeExportModal open={exportOpen} setOpen={setExportOpen} globalStore={storeRef} />
<GraphicWalker {...props} storeRef={storeRef} data={dataSource} toolbar={toolbarConfig} fields={rawFields} chart={specList} />
</div>
</React.StrictMode>
);

if (useKernel) {
const { endpointPath, fieldMetas } = propsIn;
return (
<React.StrictMode>
<div className="h-full w-full overflow-y-scroll font-sans">
<CodeExportModal open={exportOpen} setOpen={setExportOpen} globalStore={storeRef} />
<GraphicWalker
{...props}
storeRef={storeRef}
toolbar={toolbarConfig}
fields={rawFields}
chart={specList}
computation={getDataFromKernelBySql(fieldMetas, endpointPath)}
/>
</div>
</React.StrictMode>
);
} else {
return (
<React.StrictMode>
<div className="h-full w-full overflow-y-scroll font-sans">
<CodeExportModal open={exportOpen} setOpen={setExportOpen} globalStore={storeRef} />
<GraphicWalker {...props} storeRef={storeRef} data={dataSource} toolbar={toolbarConfig} fields={rawFields} chart={specList} />
</div>
</React.StrictMode>
);
}
});

const GWalkR = (props: IAppProps, id: string) => {
Expand All @@ -46,11 +70,26 @@ const GWalkR = (props: IAppProps, id: string) => {
shadowRoot.appendChild(styleElement);

const root = createRoot(shadowRoot);
root.render(
<StyleSheetManager target={shadowRoot}>
<App {...props} />
</StyleSheetManager>
);

if (props.useKernel) {
initDslParser(wasmPath)
.then(() => {
root.render(
<StyleSheetManager target={shadowRoot}>
<App {...props} />
</StyleSheetManager>
);
})
.catch((e) => {
console.error(e);
});
} else {
root.render(
<StyleSheetManager target={shadowRoot}>
<App {...props} />
</StyleSheetManager>
);
}
}
// If you want to execute GWalkR after the document has loaded, you can do it here.
// But remember, you will need to provide the 'props' and 'id' parameters.
Expand Down
Loading

0 comments on commit 980498a

Please sign in to comment.