Skip to content

Commit

Permalink
Add surrounding context for Tanakh references
Browse files Browse the repository at this point in the history
  • Loading branch information
ronshapiro committed Nov 9, 2024
1 parent 4ecc2af commit 0e32b45
Show file tree
Hide file tree
Showing 61 changed files with 76,870 additions and 5,745 deletions.
36 changes: 36 additions & 0 deletions __tests__/array.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import {surroundingContext} from "../arrays";

test("context: zero", () => {
const zero = () => surroundingContext(["a"], "a", 0);
expect(zero).toThrow();
});

test("context: negative", () => {
const negative = () => surroundingContext(["a"], "a", -1);
expect(negative).toThrow();
});

test("single", () => {
expect(surroundingContext(["a"], "a", 1)).toEqual(["a"]);
expect(surroundingContext(["a"], "a", 10)).toEqual(["a"]);
});


test("first", () => {
expect(surroundingContext([10, 11, 12], 10, 1)).toEqual([10, 11]);
expect(surroundingContext([10, 11, 12], 10, 2)).toEqual([10, 11, 12]);
expect(surroundingContext([10, 11, 12], 10, 3)).toEqual([10, 11, 12]);
});


test("middle", () => {
expect(surroundingContext([10, 11, 12], 11, 1)).toEqual([10, 11, 12]);
expect(surroundingContext([10, 11, 12], 11, 2)).toEqual([10, 11, 12]);
expect(surroundingContext([10, 11, 12], 11, 3)).toEqual([10, 11, 12]);
});

test("last", () => {
expect(surroundingContext([10, 11, 12], 12, 1)).toEqual([11, 12]);
expect(surroundingContext([10, 11, 12], 12, 2)).toEqual([10, 11, 12]);
expect(surroundingContext([10, 11, 12], 12, 3)).toEqual([10, 11, 12]);
});
30 changes: 23 additions & 7 deletions api_request_handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
Section,
ApiComment,
} from "./apiTypes";
import {surroundingContext} from "./arrays";
import {Book, books, internalLinkableRef} from "./books";
import {ALL_COMMENTARIES, CommentaryType} from "./commentaries";
import {readUtf8} from "./files";
Expand All @@ -29,7 +30,8 @@ import {
segmentCount,
} from "./precomputed";
import {dedupeEnglishRabbiNames, dedupeHebrewRabbiNames, topicJson} from "./precomputed/topics";
import {llmGeneratedTopic} from "./precomputed/tanakh_context_cache";
import {llmGeneratedTopic, LlmGeneratedTopic} from "./precomputed/tanakh_context_cache";
import {getTanakhPassage} from "./precomputed/tanakh_passages";
import {expandRef} from "./ref_expander";
import {splitOnBookName} from "./refs";
import {RequestMaker} from "./request_makers";
Expand Down Expand Up @@ -128,6 +130,10 @@ function stripPossiblePrefix(text: string, prefix: string): string {

type SplitType = [string, string][];

function llmGeneratedTopicForLink(link: sefaria.TextLink): LlmGeneratedTopic | undefined {
return llmGeneratedTopic(link.ref) ?? llmGeneratedTopic(link.sourceRef);
}

/** A single comment on a text. */
class Comment {
duplicateRefs: string[] = [];
Expand Down Expand Up @@ -202,7 +208,7 @@ class Comment {
}

if (englishName === "Verses") {
const llmResult = llmGeneratedTopic(ref);
const llmResult = llmGeneratedTopicForLink(link);
if (llmResult === undefined) {
// TODO: this happens for spanned refs!
logger.error("No result for", ref);
Expand Down Expand Up @@ -864,10 +870,20 @@ export abstract class AbstractApiRequestHandler {
}

protected maybeRewriteLinkRef(commentaryType: CommentaryType, link: sefaria.TextLink): void {
if (!link.ref.includes(":")
&& books.byCanonicalName[link.collectiveTitle?.en ?? ""]?.isBibleBook()) {
link.expandedRefsAfterRewriting = (
_.range(1, segmentCount(link.ref)!).map(x => `${link.ref}:${x}`));
if (books.byCanonicalName[link.collectiveTitle?.en ?? ""]?.isBibleBook()) {
if (link.ref.includes(":")) {
const expandedRefs = expandRef(link.ref)!;
const passage = getTanakhPassage(expandedRefs[0]);
if (passage) {
link.originalRefsBeforeRewriting = expandedRefs;
const context = surroundingContext(passage, expandedRefs[0], 10);
link.expandedRefsAfterRewriting = context;
link.ref = `${context[0]}-${splitOnBookName(context.at(-1)!)[1]}`;
}
} else {
link.expandedRefsAfterRewriting = (
_.range(1, segmentCount(link.ref)!).map(x => `${link.ref}:${x}`));
}
}
if (this.isMesoratHashasTalmudRef(commentaryType, link)) {
const newRef = getSugyaSpanningRef(link.collectiveTitle?.en ?? "", link.ref);
Expand Down Expand Up @@ -1119,7 +1135,7 @@ export abstract class AbstractApiRequestHandler {
const {comment, footnotes} = FootnotesExtractor.extract(linkResponse);
commentary.addComment(Comment.create(link, comment, commentaryType.englishName, this.logger));
if (commentaryType.englishName === "Verses") {
const context = llmGeneratedTopic(linkRef)?.surroundingContext;
const context = llmGeneratedTopicForLink(link)?.surroundingContext;
if (context) {
nestedCommentary.addComment(new Comment(
"Context",
Expand Down
6 changes: 6 additions & 0 deletions arrays.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export function surroundingContext<T>(array: T[], item: T, context: number): T[] {
if (context <= 0) throw new Error(`Invalid context: ${context} must be positive`);
const index = array.indexOf(item);
const start = Math.max(index - context, 0);
return array.slice(start, index + context + 1);
}
2 changes: 1 addition & 1 deletion precomputed/tanakh_context_cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ interface SurroundingContext {
hebrew: string;
}

interface LlmGeneratedTopic {
export interface LlmGeneratedTopic {
english: string;
hebrew: string;
surroundingContext: SurroundingContext;
Expand Down
Loading

0 comments on commit 0e32b45

Please sign in to comment.