From cabe857287b30a950d6610e2f990f3072e6e6d52 Mon Sep 17 00:00:00 2001
From: Wey Gu <weyl.gu@gmail.com>
Date: Fri, 19 Jan 2024 17:49:15 +0800
Subject: [PATCH] prompt: default prompt for KG builder

Previously, due to the fact we assumed "name" is special for
a extracted node to actually act as its vertexID, and, there
in NebulaGraph the id is actually composite 4 fields(src,dst,type,rank),
thus there is no "name" fields in extracted JSON.

In such case, the "name" field in schema of edge type will not
be handled properly in some of LLMs.

Also, previously, the JSON format was not exactly an example, but
a half example(nodes, edges) half json type schema(xxx:string, yyy:object).

This is actually not clear enough, which will cost mind-power of LLMs.

Now it's changed to a real example.
---
 server/api/studio/etc/ai-importer.yaml | 25 ++++++++++++++---------
 server/api/studio/etc/studio-api.yaml  | 28 +++++++++++++++++---------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/server/api/studio/etc/ai-importer.yaml b/server/api/studio/etc/ai-importer.yaml
index fc2a0b7d..b2e3a841 100644
--- a/server/api/studio/etc/ai-importer.yaml
+++ b/server/api/studio/etc/ai-importer.yaml
@@ -15,18 +15,25 @@ MaxBlockSize: 0 # max request block num
 GQLBatchSize: 100 # max gql batch size
 PromptTemplate: |
     As a knowledge graph AI importer, your task is to extract useful data from the following text:
-    ----text
+    
+    ---
     {text}
-    ----
+    ---
 
-    the knowledge graph has following schema and node name must be a real :
-    ----graph schema
+    The knowledge graph should follow this schema (node name is mandatory):
+
+    ---
     {spaceSchema}
-    ----
+    ---
+
+    Please return the results in JSON format only, without any explanations or comments. The JSON should include nodes and edges with their properties, as shown below:
 
-    Return the results directly, without explain and comment. The results should be in the following JSON format:
+    ```json
     {
-      "nodes":[{ "name":string,"type":string,"props":object }],
-      "edges":[{ "src":string,"dst":string,"edgeType":string,"props":object }]
+      "nodes":[{ "name":"foo","type":"node_type_1","props":{"key_x":"85%"} }],
+      "edges":[{ "src":"foo","dst":"bar","edgeType":"edge_type_3","props":{"name":"is located in"} }]
     }
-    the result json is:
\ No newline at end of file
+    ```
+
+    Ensure the JSON is correctly formatted. Now, extract!
+    JSON:
diff --git a/server/api/studio/etc/studio-api.yaml b/server/api/studio/etc/studio-api.yaml
index e612ebdc..da29fd17 100644
--- a/server/api/studio/etc/studio-api.yaml
+++ b/server/api/studio/etc/studio-api.yaml
@@ -65,17 +65,27 @@ LLM:
   MaxBlockSize: 0
   PromptTemplate: |
     As a knowledge graph AI importer, your task is to extract useful data from the following text:
-    ```text
+
+    ---
     {text}
-    ```
-    the knowledge graph has following schema and node name must be a real :
-    ```graph-schema
+    ---
+
+    The knowledge graph should follow this schema (node name is mandatory):
+
+    ---
     {spaceSchema}
-    ```
+    ---
+
     {userPrompt}
-    Return the results directly, without explain and comment. The results should be in the following JSON format:
+
+    Please return the results in JSON format only, without any explanations or comments. The JSON should include nodes and edges with their properties, as shown below:
+
+    ```json
     {
-      "nodes":[{ "name":string,"type":string,"props":object }],
-      "edges":[{ "src":string,"dst":string,"edgeType":string,"props":object }]
+      "nodes":[{ "name":"foo","type":"node_type_1","props":{"key_x":"85%"} }],
+      "edges":[{ "src":"foo","dst":"bar","edgeType":"edge_type_3","props":{"name":"is located in"} }]
     }
-    the result json is:
\ No newline at end of file
+    ```
+    
+    Ensure the JSON is correctly formatted. Now, extract!
+    JSON: