Generate en docs

milvus-io · Oct 28, 2024 · aae217b · aae217b
1 parent 075fca4
commit aae217b
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 63 deletions.
diff --git a/localization/v2.4.x/site/en/integrations/integrate_with_sentencetransformers.json b/localization/v2.4.x/site/en/integrations/integrate_with_sentencetransformers.json
@@ -1 +1 @@
-{"codeList":["pip install pymilvus sentence-transformers datasets tqdm\n","from datasets import load_dataset\nfrom pymilvus import MilvusClient, connections\nfrom pymilvus import FieldSchema, CollectionSchema, DataType, Collection\nfrom sentence_transformers import SentenceTransformer\nfrom tqdm import tqdm\n","embedding_dim = 384\ncollection_name = \"movie_embeddings\"\n","ds = load_dataset(\"vishnupriyavr/wiki-movie-plots-with-summaries\", split=\"train\")\nprint(ds)\n","connections.connect(uri=\"./sentence_transformers_example.db\")\n","fields = [\n    FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=True),\n    FieldSchema(name='title', dtype=DataType.VARCHAR, max_length=256),\n    FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, dim=embedding_dim)\n]\n\nschema = CollectionSchema(fields=fields, enable_dynamic_field=False)\ncollection = Collection(name=collection_name, schema=schema)\n","params = {\n    'index_type':\"FLAT\",\n    'metric_type': \"IP\"\n    }\n\ncollection.create_index(\n    'embedding',\n    params\n)\n","model = SentenceTransformer(\"all-MiniLM-L12-v2\")\n","for batch in tqdm(ds.batch(batch_size=512)):\n    embeddings = model.encode(batch['PlotSummary'])\n    data = [{\"title\": title, \"embedding\": embedding} for title, embedding in zip(batch['Title'], embeddings)]\n    res = collection.insert(data=data)\n","collection.flush()\nprint(collection.num_entities)\n","queries = [\n    'A shark terrorizes an LA beach.',\n    'An archaeologist searches for ancient artifacts while fighting Nazis.',\n    'Teenagers in detention learn about themselves.',\n    'A teenager fakes illness to get off school and have adventures with two friends.',\n    'A young couple with a kid look after a hotel during winter and the husband goes insane.',\n    'Four turtles fight bad guys.'\n    ]\n\n# Search the database based on input text\ndef embed_search(data):\n    embeds = model.encode(data) \n    return [x for x in embeds]\n\nsearch_data = embed_search(queries)\n\nres = collection.search(\n    data=search_data,\n    anns_field=\"embedding\",\n    param={},\n    limit=3,\n    output_fields=['title']\n)\n\nfor idx, hits in enumerate(res):\n    print('Title:', queries[idx])\n    # print('Search Time:', end-start)\n    print('Results:')\n    for hit in hits:\n        print( hit.entity.get('title'), '(', round(hit.distance, 2), ')')\n    print()\n","Title: An archaeologist searches for ancient artifacts while fighting Nazis.\nResults:\n\"Pimpernel\" Smith ( 0.48 )\nPhantom of Chinatown ( 0.42 )\nCounterblast ( 0.41 )\n\nTitle: Teenagers in detention learn about themselves.\nResults:\nThe Breakfast Club ( 0.54 )\nUp the Academy ( 0.46 )\nFame ( 0.43 )\n\nTitle: A teenager fakes illness to get off school and have adventures with two friends.\nResults:\nFerris Bueller's Day Off ( 0.48 )\nFever Lake ( 0.47 )\nA Walk to Remember ( 0.45 )\n\nTitle: A young couple with a kid look after a hotel during winter and the husband goes insane.\nResults:\nAlways a Bride ( 0.54 )\nFast and Loose ( 0.49 )\nThe Shining ( 0.48 )\n\nTitle: Four turtles fight bad guys.\nResults:\nTMNT 2: Out of the Shadows ( 0.49 )\nTeenage Mutant Ninja Turtles II: The Secret of the Ooze ( 0.47 )\nGamera: Super Monster ( 0.43 )\n"],"headingContent":"Movie Search Using Milvus and SentenceTransformers","anchorList":[{"label":"Movie Search Using Milvus and SentenceTransformers","href":"Movie-Search-Using-Milvus-and-SentenceTransformers","type":1,"isActive":false},{"label":"Required Libraries","href":"Required-Libraries","type":2,"isActive":false},{"label":"Downloading and Opening the Dataset","href":"Downloading-and-Opening-the-Dataset","type":2,"isActive":false},{"label":"Connecting to the Database","href":"Connecting-to-the-Database","type":2,"isActive":false},{"label":"Inserting the Data","href":"Inserting-the-Data","type":2,"isActive":false},{"label":"Performing the Search","href":"Performing-the-Search","type":2,"isActive":false}]}
+{"codeList":["pip install pymilvus sentence-transformers datasets tqdm\n","from datasets import load_dataset\nfrom pymilvus import MilvusClient\nfrom pymilvus import FieldSchema, CollectionSchema, DataType\nfrom sentence_transformers import SentenceTransformer\nfrom tqdm import tqdm\n","embedding_dim = 384\ncollection_name = \"movie_embeddings\"\n","ds = load_dataset(\"vishnupriyavr/wiki-movie-plots-with-summaries\", split=\"train\")\nprint(ds)\n","client = MilvusClient(uri=\"./sentence_transformers_example.db\")\n","fields = [\n    FieldSchema(name=\"id\", dtype=DataType.INT64, is_primary=True, auto_id=True),\n    FieldSchema(name=\"title\", dtype=DataType.VARCHAR, max_length=256),\n    FieldSchema(name=\"embedding\", dtype=DataType.FLOAT_VECTOR, dim=embedding_dim),\n    FieldSchema(name=\"year\", dtype=DataType.INT64),\n    FieldSchema(name=\"origin\", dtype=DataType.VARCHAR, max_length=64),\n]\n\nschema = CollectionSchema(fields=fields, enable_dynamic_field=False)\nclient.create_collection(collection_name=collection_name, schema=schema)\n","index_params = client.prepare_index_params()\nindex_params.add_index(field_name=\"embedding\", index_type=\"FLAT\", metric_type=\"IP\")\nclient.create_index(collection_name, index_params)\n","model = SentenceTransformer(\"all-MiniLM-L12-v2\")\n","for batch in tqdm(ds.batch(batch_size=512)):\n    embeddings = model.encode(batch[\"PlotSummary\"])\n    data = [\n        {\"title\": title, \"embedding\": embedding, \"year\": year, \"origin\": origin}\n        for title, embedding, year, origin in zip(\n            batch[\"Title\"], embeddings, batch[\"Release Year\"], batch[\"Origin/Ethnicity\"]\n        )\n    ]\n    res = client.insert(collection_name=collection_name, data=data)\n","queries = [\n    'A shark terrorizes an LA beach.',\n    'An archaeologist searches for ancient artifacts while fighting Nazis.',\n    'Teenagers in detention learn about themselves.',\n    'A teenager fakes illness to get off school and have adventures with two friends.',\n    'A young couple with a kid look after a hotel during winter and the husband goes insane.',\n    'Four turtles fight bad guys.'\n    ]\n\n# Search the database based on input text\ndef embed_query(data):\n    vectors = model.encode(data)\n    return [x for x in vectors]\n\n\nquery_vectors = embed_query(queries)\n\nres = client.search(\n    collection_name=collection_name,\n    data=query_vectors,\n    filter='origin == \"American\" and year > 1945 and year < 2000',\n    anns_field=\"embedding\",\n    limit=3,\n    output_fields=[\"title\"],\n)\n\nfor idx, hits in enumerate(res):\n    print(\"Query:\", queries[idx])\n    print(\"Results:\")\n    for hit in hits:\n        print(hit[\"entity\"].get(\"title\"), \"(\", round(hit[\"distance\"], 2), \")\")\n    print()\n","Query: An archaeologist searches for ancient artifacts while fighting Nazis.\nResults:\nLove Slaves of the Amazons ( 0.4 )\nA Time to Love and a Time to Die ( 0.39 )\nThe Fifth Element ( 0.39 )\n\nQuery: Teenagers in detention learn about themselves.\nResults:\nThe Breakfast Club ( 0.54 )\nUp the Academy ( 0.46 )\nFame ( 0.43 )\n\nQuery: A teenager fakes illness to get off school and have adventures with two friends.\nResults:\nFerris Bueller's Day Off ( 0.48 )\nFever Lake ( 0.47 )\nLosin' It ( 0.39 )\n\nQuery: A young couple with a kid look after a hotel during winter and the husband goes insane.\nResults:\nThe Shining ( 0.48 )\nThe Four Seasons ( 0.42 )\nHighball ( 0.41 )\n\nQuery: Four turtles fight bad guys.\nResults:\nTeenage Mutant Ninja Turtles II: The Secret of the Ooze ( 0.47 )\nDevil May Hare ( 0.43 )\nAttack of the Giant Leeches ( 0.42 )\n"],"headingContent":"Movie Search Using Milvus and SentenceTransformers","anchorList":[{"label":"Movie Search Using Milvus and SentenceTransformers","href":"Movie-Search-Using-Milvus-and-SentenceTransformers","type":1,"isActive":false},{"label":"Required Libraries","href":"Required-Libraries","type":2,"isActive":false},{"label":"Downloading and Opening the Dataset","href":"Downloading-and-Opening-the-Dataset","type":2,"isActive":false},{"label":"Connecting to the Database","href":"Connecting-to-the-Database","type":2,"isActive":false},{"label":"Inserting the Data","href":"Inserting-the-Data","type":2,"isActive":false},{"label":"Performing the Search","href":"Performing-the-Search","type":2,"isActive":false}]}
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"codeList":["pip install pymilvus sentence-transformers datasets tqdm\n","from datasets import load_dataset\nfrom pymilvus import MilvusClient, connections\nfrom pymilvus import FieldSchema, CollectionSchema, DataType, Collection\nfrom sentence_transformers import SentenceTransformer\nfrom tqdm import tqdm\n","embedding_dim = 384\ncollection_name = \"movie_embeddings\"\n","ds = load_dataset(\"vishnupriyavr/wiki-movie-plots-with-summaries\", split=\"train\")\nprint(ds)\n","connections.connect(uri=\"./sentence_transformers_example.db\")\n","fields = [\n FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=True),\n FieldSchema(name='title', dtype=DataType.VARCHAR, max_length=256),\n FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, dim=embedding_dim)\n]\n\nschema = CollectionSchema(fields=fields, enable_dynamic_field=False)\ncollection = Collection(name=collection_name, schema=schema)\n","params = {\n 'index_type':\"FLAT\",\n 'metric_type': \"IP\"\n }\n\ncollection.create_index(\n 'embedding',\n params\n)\n","model = SentenceTransformer(\"all-MiniLM-L12-v2\")\n","for batch in tqdm(ds.batch(batch_size=512)):\n embeddings = model.encode(batch['PlotSummary'])\n data = [{\"title\": title, \"embedding\": embedding} for title, embedding in zip(batch['Title'], embeddings)]\n res = collection.insert(data=data)\n","collection.flush()\nprint(collection.num_entities)\n","queries = [\n 'A shark terrorizes an LA beach.',\n 'An archaeologist searches for ancient artifacts while fighting Nazis.',\n 'Teenagers in detention learn about themselves.',\n 'A teenager fakes illness to get off school and have adventures with two friends.',\n 'A young couple with a kid look after a hotel during winter and the husband goes insane.',\n 'Four turtles fight bad guys.'\n ]\n\n# Search the database based on input text\ndef embed_search(data):\n embeds = model.encode(data) \n return [x for x in embeds]\n\nsearch_data = embed_search(queries)\n\nres = collection.search(\n data=search_data,\n anns_field=\"embedding\",\n param={},\n limit=3,\n output_fields=['title']\n)\n\nfor idx, hits in enumerate(res):\n print('Title:', queries[idx])\n # print('Search Time:', end-start)\n print('Results:')\n for hit in hits:\n print( hit.entity.get('title'), '(', round(hit.distance, 2), ')')\n print()\n","Title: An archaeologist searches for ancient artifacts while fighting Nazis.\nResults:\n\"Pimpernel\" Smith ( 0.48 )\nPhantom of Chinatown ( 0.42 )\nCounterblast ( 0.41 )\n\nTitle: Teenagers in detention learn about themselves.\nResults:\nThe Breakfast Club ( 0.54 )\nUp the Academy ( 0.46 )\nFame ( 0.43 )\n\nTitle: A teenager fakes illness to get off school and have adventures with two friends.\nResults:\nFerris Bueller's Day Off ( 0.48 )\nFever Lake ( 0.47 )\nA Walk to Remember ( 0.45 )\n\nTitle: A young couple with a kid look after a hotel during winter and the husband goes insane.\nResults:\nAlways a Bride ( 0.54 )\nFast and Loose ( 0.49 )\nThe Shining ( 0.48 )\n\nTitle: Four turtles fight bad guys.\nResults:\nTMNT 2: Out of the Shadows ( 0.49 )\nTeenage Mutant Ninja Turtles II: The Secret of the Ooze ( 0.47 )\nGamera: Super Monster ( 0.43 )\n"],"headingContent":"Movie Search Using Milvus and SentenceTransformers","anchorList":[{"label":"Movie Search Using Milvus and SentenceTransformers","href":"Movie-Search-Using-Milvus-and-SentenceTransformers","type":1,"isActive":false},{"label":"Required Libraries","href":"Required-Libraries","type":2,"isActive":false},{"label":"Downloading and Opening the Dataset","href":"Downloading-and-Opening-the-Dataset","type":2,"isActive":false},{"label":"Connecting to the Database","href":"Connecting-to-the-Database","type":2,"isActive":false},{"label":"Inserting the Data","href":"Inserting-the-Data","type":2,"isActive":false},{"label":"Performing the Search","href":"Performing-the-Search","type":2,"isActive":false}]}
		{"codeList":["pip install pymilvus sentence-transformers datasets tqdm\n","from datasets import load_dataset\nfrom pymilvus import MilvusClient\nfrom pymilvus import FieldSchema, CollectionSchema, DataType\nfrom sentence_transformers import SentenceTransformer\nfrom tqdm import tqdm\n","embedding_dim = 384\ncollection_name = \"movie_embeddings\"\n","ds = load_dataset(\"vishnupriyavr/wiki-movie-plots-with-summaries\", split=\"train\")\nprint(ds)\n","client = MilvusClient(uri=\"./sentence_transformers_example.db\")\n","fields = [\n FieldSchema(name=\"id\", dtype=DataType.INT64, is_primary=True, auto_id=True),\n FieldSchema(name=\"title\", dtype=DataType.VARCHAR, max_length=256),\n FieldSchema(name=\"embedding\", dtype=DataType.FLOAT_VECTOR, dim=embedding_dim),\n FieldSchema(name=\"year\", dtype=DataType.INT64),\n FieldSchema(name=\"origin\", dtype=DataType.VARCHAR, max_length=64),\n]\n\nschema = CollectionSchema(fields=fields, enable_dynamic_field=False)\nclient.create_collection(collection_name=collection_name, schema=schema)\n","index_params = client.prepare_index_params()\nindex_params.add_index(field_name=\"embedding\", index_type=\"FLAT\", metric_type=\"IP\")\nclient.create_index(collection_name, index_params)\n","model = SentenceTransformer(\"all-MiniLM-L12-v2\")\n","for batch in tqdm(ds.batch(batch_size=512)):\n embeddings = model.encode(batch[\"PlotSummary\"])\n data = [\n {\"title\": title, \"embedding\": embedding, \"year\": year, \"origin\": origin}\n for title, embedding, year, origin in zip(\n batch[\"Title\"], embeddings, batch[\"Release Year\"], batch[\"Origin/Ethnicity\"]\n )\n ]\n res = client.insert(collection_name=collection_name, data=data)\n","queries = [\n 'A shark terrorizes an LA beach.',\n 'An archaeologist searches for ancient artifacts while fighting Nazis.',\n 'Teenagers in detention learn about themselves.',\n 'A teenager fakes illness to get off school and have adventures with two friends.',\n 'A young couple with a kid look after a hotel during winter and the husband goes insane.',\n 'Four turtles fight bad guys.'\n ]\n\n# Search the database based on input text\ndef embed_query(data):\n vectors = model.encode(data)\n return [x for x in vectors]\n\n\nquery_vectors = embed_query(queries)\n\nres = client.search(\n collection_name=collection_name,\n data=query_vectors,\n filter='origin == \"American\" and year > 1945 and year < 2000',\n anns_field=\"embedding\",\n limit=3,\n output_fields=[\"title\"],\n)\n\nfor idx, hits in enumerate(res):\n print(\"Query:\", queries[idx])\n print(\"Results:\")\n for hit in hits:\n print(hit[\"entity\"].get(\"title\"), \"(\", round(hit[\"distance\"], 2), \")\")\n print()\n","Query: An archaeologist searches for ancient artifacts while fighting Nazis.\nResults:\nLove Slaves of the Amazons ( 0.4 )\nA Time to Love and a Time to Die ( 0.39 )\nThe Fifth Element ( 0.39 )\n\nQuery: Teenagers in detention learn about themselves.\nResults:\nThe Breakfast Club ( 0.54 )\nUp the Academy ( 0.46 )\nFame ( 0.43 )\n\nQuery: A teenager fakes illness to get off school and have adventures with two friends.\nResults:\nFerris Bueller's Day Off ( 0.48 )\nFever Lake ( 0.47 )\nLosin' It ( 0.39 )\n\nQuery: A young couple with a kid look after a hotel during winter and the husband goes insane.\nResults:\nThe Shining ( 0.48 )\nThe Four Seasons ( 0.42 )\nHighball ( 0.41 )\n\nQuery: Four turtles fight bad guys.\nResults:\nTeenage Mutant Ninja Turtles II: The Secret of the Ooze ( 0.47 )\nDevil May Hare ( 0.43 )\nAttack of the Giant Leeches ( 0.42 )\n"],"headingContent":"Movie Search Using Milvus and SentenceTransformers","anchorList":[{"label":"Movie Search Using Milvus and SentenceTransformers","href":"Movie-Search-Using-Milvus-and-SentenceTransformers","type":1,"isActive":false},{"label":"Required Libraries","href":"Required-Libraries","type":2,"isActive":false},{"label":"Downloading and Opening the Dataset","href":"Downloading-and-Opening-the-Dataset","type":2,"isActive":false},{"label":"Connecting to the Database","href":"Connecting-to-the-Database","type":2,"isActive":false},{"label":"Inserting the Data","href":"Inserting-the-Data","type":2,"isActive":false},{"label":"Performing the Search","href":"Performing-the-Search","type":2,"isActive":false}]}