-
Notifications
You must be signed in to change notification settings - Fork 0
/
ChatBot.py
118 lines (80 loc) · 3.53 KB
/
ChatBot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 12 00:01:25 2020
@author: Rahul
"""
import nltk
#import libraries
from newspaper import Article
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import numpy as np
import warnings
warnings.filterwarnings('ignore')
nltk.download('punkt', quiet=True) # Download the punkt package
nltk.download('wordnet', quiet=True) # Download the wordnet package
#Get the article URL
article = Article('https://www.mayoclinic.org/diseases-conditions/chronic-kidney-disease/symptoms-causes/syc-20354521')
article.download() #Download the article
article.parse() #Parse the article
article.nlp() #Apply Natural Language Processing (NLP)
corpus = article.text #Store the article text into corpus
print(corpus)
#Tokenization
text = corpus
sent_tokens = nltk.sent_tokenize(text)# txt to a list of sentences
#Print the list of sentences
print(sent_tokens)
# Keyword Matching
#Greeting input from the user
GREETING_INPUTS = ["hi", "hello", "hola", "greetings", "wassup","hey"]
#Greeting responses back to the user
GREETING_RESPONSES = ["howdy","hi", "hey", "what's good", "hello","hey there"]
#Function to return a random greeting response to a users greeting
def greeting(sentence):
#If user's input is a greeting, return a randomly chosen greeting response
for word in sentence.split():
if word.lower() in GREETING_INPUTS:
return random.choice(GREETING_RESPONSES)
#Create a dictionary (key:value pair) to remove punctuations
remove_punct_dict = dict( (ord(punct), None) for punct in string.punctuation
#Create a function to return a list of lemmatized lower case words after removing punctuations
def LemNormalize(text):
return nltk.word_tokenize(text.lower().translate(remove_punct_dict))
# Generating response
def response(user_response):
robo_response='' #Create an empty response for the bot
sent_tokens.append(user_response) #Append the users response to the list of sentence tokens
TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
tfidf = TfidfVec.fit_transform(sent_tokens)
vals = cosine_similarity(tfidf[-1], tfidf)
idx=vals.argsort()[0][-2]
flat = vals.flatten()
flat.sort()
score = flat[-2]
if(score==0):
robo_response=robo_response+"I apologize, I don't understand."
else:
robo_response = robo_response+sent_tokens[idx]
sent_tokens.remove(user_response)
return robo_response
flag=True
print("DOCBot: I am DOCTOR BOT or DOCBot for short. I will answer your queries about Chronic Kidney Disease. If you want to exit, type Bye!")
while(flag==True):
user_response = input()
user_response=user_response.lower()
if(user_response!='bye'):
if(user_response=='thanks' or user_response=='thank you' ):
flag=False
print("DOCBot: You're welcome !")
else:
if(greeting(user_response)!=None):
print("DOCBot: "+greeting(user_response))
else:
print("DOCBot: "+response(user_response))
else:
flag=False
print("DOCBot: Chat with you later !")