Skip to content

Commit

Permalink
Merge pull request #10 from iwilltry42/feat/nbformat
Browse files Browse the repository at this point in the history
fix: nbformat
  • Loading branch information
hupe1980 authored Aug 5, 2024
2 parents 7d4ed8b + ecb30e6 commit 635f935
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 11 deletions.
3 changes: 1 addition & 2 deletions documentloader/notebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"fmt"
"io"
"strings"

"github.com/hupe1980/golc/integration/nbformat"
"github.com/hupe1980/golc/schema"
Expand Down Expand Up @@ -75,7 +74,7 @@ func (l *Notebook) Load(ctx context.Context) ([]schema.Document, error) {
pageContent += fmt.Sprintf("'%s' cell: '%s'\n, gives error '%s', with description '%s'\n\n", c.CellType, c.Source, eName, eValue)
}
} else if c.Outputs[0].OutputType == "stream" {
output := strings.Join(c.Outputs[0].Text, "")
output := c.Outputs[0].Text
minOutput := len(output)

if minOutput > int(l.opts.MaxOutputLength) {
Expand Down
102 changes: 93 additions & 9 deletions integration/nbformat/nbformat.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,111 @@ package nbformat
import (
"encoding/json"
"io"
"strings"
)

// Notebook represents a Jupyter Notebook containing multiple cells.
type Notebook struct {
Cells []Cell `json:"cells"`
Metadata Metadata `json:"metadata"`
Nbformat int `json:"nbformat"`
NbformatMinor int `json:"nbformat_minor"`
Cells []Cell `json:"cells"`
}

// Metadata represents the metadata of a Jupyter Notebook.
type Metadata struct {
KernelSpec KernelSpec `json:"kernelspec"`
LanguageInfo LanguageInfo `json:"language_info"`
}

// KernelSpec represents the kernel specification in the metadata.
type KernelSpec struct {
Name string `json:"name"`
DisplayName string `json:"display_name"`
}

// LanguageInfo represents the language information in the metadata.
type LanguageInfo struct {
Name string `json:"name"`
Version string `json:"version"`
}

// Cell represents a single cell within a Jupyter Notebook.
type Cell struct {
CellType string `json:"cell_type"`
Source string `json:"source"`
Outputs []Output `json:"outputs"`
CellType string `json:"cell_type"`
Source string `json:"source"` // Could be []string, but we always convert it to a single string
Metadata map[string]interface{} `json:"metadata"`
Outputs []Output `json:"outputs,omitempty"`
}

// UnmarshalJSON custom unmarshals a Cell to ensure Source is always a single string.
func (c *Cell) UnmarshalJSON(data []byte) error {
type Alias Cell

aux := &struct {
Source interface{} `json:"source"`
*Alias
}{
Alias: (*Alias)(c),
}

if err := json.Unmarshal(data, &aux); err != nil {
return err
}

switch v := aux.Source.(type) {
case string:
c.Source = v
case []interface{}:
var lines []string
for _, line := range v {
lines = append(lines, line.(string))
}

c.Source = strings.Join(lines, "\n")
}

return nil
}

// Output represents the output of a cell in a Jupyter Notebook.
type Output struct {
ErrorName string `json:"ename"`
ErrorValue string `json:"evalue"`
Traceback []string `json:"traceback"`
OutputType string `json:"output_type"`
Text []string `json:"text"`
OutputType string `json:"output_type"`
Text string `json:"text,omitempty"` // Could be []string, but we always convert it to a single string
Data map[string]interface{} `json:"data,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
ErrorName string `json:"ename,omitempty"`
ErrorValue string `json:"evalue,omitempty"`
Traceback []string `json:"traceback,omitempty"`
}

// UnmarshalJSON custom unmarshals an Output to ensure Text is always a single string.
func (o *Output) UnmarshalJSON(data []byte) error {
type Alias Output

aux := &struct {
Text interface{} `json:"text,omitempty"`
*Alias
}{
Alias: (*Alias)(o),
}
if err := json.Unmarshal(data, &aux); err != nil {
return err
}

switch v := aux.Text.(type) {
case string:
o.Text = v
case []interface{}:
var lines []string
for _, line := range v {
lines = append(lines, line.(string))
}

o.Text = strings.Join(lines, "\n")
}

return nil
}

// ReadNBFormat reads and parses a Jupyter Notebook from the given io.Reader.
Expand Down

0 comments on commit 635f935

Please sign in to comment.