Skip to content

Commit

Permalink
feat: Implement Telegram HTML support
Browse files Browse the repository at this point in the history
  • Loading branch information
n4ze3m committed Aug 24, 2024
1 parent 8eb1c66 commit 664b0e7
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 9 deletions.
2 changes: 1 addition & 1 deletion app/ui/src/components/Bot/Playground/Message.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ export const PlaygroundMessage = (props: Props) => {
<Markdown message={props.message} />
</div>

{props.isBot && (
{props.isBot && props?.sources && props?.sources?.length > 0 && (
<Collapse
className="mt-6"
ghost
Expand Down
24 changes: 18 additions & 6 deletions server/src/integration/telegram.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { convertTextToAudio } from "./handlers/utils/audio-to-text";
import { FileFlavor, hydrateFiles } from "@grammyjs/files";
import * as fs from "fs/promises";
import { convertOggToWave } from "../utils/ffmpeg";
import { telegramFormat } from "../utils/telegram-format";
type DialoqBaseContext = FileFlavor<Context>;
export default class TelegramBot {
static get clients() {
Expand Down Expand Up @@ -73,9 +74,14 @@ export default class TelegramBot {
user_id
);

return await ctx.reply(message, {
parse_mode: "MarkdownV2",
});
if (process.env.DB_TELEGEAM_PARSE_MODE === "normal") {
return await ctx.reply(message);
}

return await ctx.reply(telegramFormat(message),
{
parse_mode: "HTML",
});
});

bot.on("message:voice", async (ctx) => {
Expand All @@ -102,9 +108,15 @@ export default class TelegramBot {
user_id
);

return await ctx.reply(message, {
parse_mode: "MarkdownV2",
});

if (process.env.DB_TELEGEAM_PARSE_MODE === "normal") {
return await ctx.reply(message);
}

return await ctx.reply(telegramFormat(message),
{
parse_mode: "HTML",
});
} catch (error) {
console.log(error);
return await ctx.reply("Opps! Something went wrong");
Expand Down
6 changes: 4 additions & 2 deletions server/src/internet/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ const searchProviders = {

export const searchInternet = async (embedding: Embeddings, { query }: { query: string }) => {

if(process.env.DISABLE_INTERNET_SEARCH == "true") {
if (process.env.DISABLE_INTERNET_SEARCH == "true") {
return [];
}

Expand All @@ -127,7 +127,9 @@ export const searchInternet = async (embedding: Embeddings, { query }: { query:
}
const datat = await searchProvider(query);

const results = datat.slice(0, TOTAL_RESULTS_LIMIT);
const data = datat.filter((doc) => doc?.content.length > 0);

const results = data.slice(0, TOTAL_RESULTS_LIMIT)

const [docEmbeddings, queryEmbedding] = await Promise.all([
embedding.embedDocuments(results.map((doc) => doc.content)),
Expand Down
131 changes: 131 additions & 0 deletions server/src/utils/telegram-format.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// this code is a typescript conversion of the original python code from the repo: https://github.com/Latand/formatter-chatgpt-telegram

function convertHtmlChars(text: string): string {
text = text.replace(/&/g, "&amp;");
text = text.replace(/</g, "&lt;");
text = text.replace(/>/g, "&gt;");
return text;
}

function splitByTag(outText: string, mdTag: string, htmlTag: string): string {
const tagPattern = new RegExp(
`(?<!\\w)${escapeRegExp(mdTag)}(.*?)${escapeRegExp(mdTag)}(?!\\w)`,
"gs"
);
return outText.replace(tagPattern, `<${htmlTag}>$1</${htmlTag}>`);
}

function escapeRegExp(string: string): string {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function ensureClosingDelimiters(text: string): string {
if ((text.match(/```/g) || []).length % 2 !== 0) {
text += "```";
}
if ((text.match(/`/g) || []).length % 2 !== 0) {
text += "`";
}
return text;
}

function extractAndConvertCodeBlocks(text: string): [string, Record<string, string>] {
text = ensureClosingDelimiters(text);
const placeholders: string[] = [];
const codeBlocks: Record<string, string> = {};

const replacer = (match: RegExpMatchArray): [string, string] => {
const language = match[1] || "";
const codeContent = match[3];
const placeholder = `CODEBLOCKPLACEHOLDER${placeholders.length}`;
placeholders.push(placeholder);
const htmlCodeBlock = language
? `<pre><code class="language-${language}">${codeContent}</code></pre>`
: `<pre><code>${codeContent}</code></pre>`;
return [placeholder, htmlCodeBlock];
};

let modifiedText = text;
const regex = /```(\w*)?(\n)?(.*?)```/gs;
let match: RegExpExecArray | null;

while ((match = regex.exec(text)) !== null) {
const [placeholder, htmlCodeBlock] = replacer(match);
codeBlocks[placeholder] = htmlCodeBlock;
modifiedText = modifiedText.replace(match[0], placeholder);
}

return [modifiedText, codeBlocks];
}

function reinsertCodeBlocks(text: string, codeBlocks: Record<string, string>): string {
for (const [placeholder, htmlCodeBlock] of Object.entries(codeBlocks)) {
text = text.replace(placeholder, htmlCodeBlock);
}
return text;
}

function combineBlockquotes(text: string): string {
const lines = text.split("\n");
const combinedLines: string[] = [];
let blockquoteLines: string[] = [];
let inBlockquote = false;

for (const line of lines) {
if (line.startsWith(">")) {
inBlockquote = true;
blockquoteLines.push(line.slice(1).trim());
} else {
if (inBlockquote) {
combinedLines.push(
`<blockquote>${blockquoteLines.join("\n")}</blockquote>`
);
blockquoteLines = [];
inBlockquote = false;
}
combinedLines.push(line);
}
}

if (inBlockquote) {
combinedLines.push(
`<blockquote>${blockquoteLines.join("\n")}</blockquote>`
);
}

return combinedLines.join("\n");
}

function removeBlockquoteEscaping(output: string): string {
return output
.replace(/&lt;blockquote&gt;/g, "<blockquote>")
.replace(/&lt;\/blockquote&gt;/g, "</blockquote>");
}

export function telegramFormat(text: string): string {
text = combineBlockquotes(text);
text = convertHtmlChars(text);

let [output, codeBlocks] = extractAndConvertCodeBlocks(text);

output = output.replace(/</g, "&lt;").replace(/>/g, "&gt;");
output = output.replace(/`(.*?)`/g, "<code>$1</code>");
output = output.replace(/\*\*\*(.*?)\*\*\*/g, "<b><i>$1</i></b>");
output = output.replace(/\_\_\_(.*?)\_\_\_/g, "<u><i>$1</i></u>");

output = splitByTag(output, "**", "b");
output = splitByTag(output, "__", "u");
output = splitByTag(output, "_", "i");
output = splitByTag(output, "*", "i");
output = splitByTag(output, "~~", "s");

output = output.replace(/[^]+/g, "");
output = output.replace(/!?\\[(.*?)\\]\\((.*?)\\)/g, '<a href="$2">$1</a>');
output = output.replace(/^\s*#+ (.+)/gm, "<b>$1</b>");
output = output.replace(/^(\s*)[\-\*] (.+)/gm, "$1• $2");

output = reinsertCodeBlocks(output, codeBlocks);
output = removeBlockquoteEscaping(output);

return output;
}

0 comments on commit 664b0e7

Please sign in to comment.