diff --git a/application/app.py b/application/app.py deleted file mode 100644 index ae389b0..0000000 --- a/application/app.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -from modules.DialogueManagement.manager import DialogueManager -from modules.LanguageGeneration.generator import LanguageGenerator -from modules.LanguageUnderstanding.language_understanding import LanguageUnderstanding - - -if __name__ == '__main__': - generator = LanguageGenerator() - language_understanding = LanguageUnderstanding() - manager = DialogueManager() - - print('S: 料理のジャンルや場所をおっしゃってください。') - while True: - # Input from User - sent = input('U: ') - if sent == 'ありがとう': - print('S: どういたしまして') - break - - # Language Understanding - dialogue_act = language_understanding.execute(sent) - - # Update Dialogue state - manager.update_dialogue_state(dialogue_act) - sys_act_type = manager.select_action(dialogue_act) - - # Generate Sentence - sent = generator.generate_sentence(sys_act_type) - print(sent) \ No newline at end of file diff --git a/application/console.py b/application/console.py new file mode 100644 index 0000000..140b844 --- /dev/null +++ b/application/console.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +from dialogue_system.bot import Bot + + +if __name__ == '__main__': + bot = Bot() + + print('S: 料理のジャンルや場所をおっしゃってください。') + while True: + sent = input('U: ') + if sent == 'ありがとう': + print('S: どういたしまして') + break + + reply = bot.reply(sent) + print('S: {0}'.format(reply)) diff --git a/modules/LanguageUnderstanding/utils/__init__.py b/application/plugins/__init__.py similarity index 100% rename from modules/LanguageUnderstanding/utils/__init__.py rename to application/plugins/__init__.py diff --git a/application/plugins/slack.py b/application/plugins/slack.py new file mode 100644 index 0000000..fec1155 --- /dev/null +++ b/application/plugins/slack.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +from slackbot.bot import respond_to + +from dialogue_system.bot import Bot + +bots = {} + + +def create_or_read(user_id): + return bots[user_id] if user_id in bots else Bot() + + +def save_bot(bot, user_id): + bots[user_id] = bot + + +@respond_to('(.*)') +def food(message, something): + body = message.body + text, ts, user_id = body['text'], body['ts'], body['user'] + bot = create_or_read(user_id) + reply_message = bot.reply(text) + save_bot(bot, user_id) + message.reply(reply_message) + diff --git a/application/server.py b/application/server.py new file mode 100644 index 0000000..dfa497d --- /dev/null +++ b/application/server.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +import tornado.ioloop +import tornado.web +import tornado.websocket + +from dialogue_system.bot import Bot + + +class MessageServer(tornado.websocket.WebSocketHandler): + bots = {} + + def check_origin(self, origin): + return True + + def open(self): + print('on open') + self.bots[self] = Bot() + self.write_message('料理のジャンルや場所をおっしゃってください。') + + def on_message(self, message): + print('on message') + print(message) + bot = self.bots[self] + self.write_message(bot.reply(message)) + + def on_close(self): + print('on close') + del self.bots[self] + +application = tornado.web.Application([(r'/ws', MessageServer)]) + +if __name__ == '__main__': + application.listen(8080) + tornado.ioloop.IOLoop.current().start() \ No newline at end of file diff --git a/application/slack_bot.py b/application/slack_bot.py new file mode 100644 index 0000000..aa5d04f --- /dev/null +++ b/application/slack_bot.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from slackbot.bot import Bot + + +def main(): + bot = Bot() + bot.run() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/application/slackbot_settings.py b/application/slackbot_settings.py new file mode 100644 index 0000000..d21792a --- /dev/null +++ b/application/slackbot_settings.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +import os + + +API_TOKEN = os.environ.get('SLACK_API_KEY', '') + +default_reply = "スイマセン。其ノ言葉ワカリマセン" + +PLUGINS = [ + 'plugins', +] \ No newline at end of file diff --git a/modules/BackEnd/APIs/__init__.py b/dialogue_system/__init__.py similarity index 100% rename from modules/BackEnd/APIs/__init__.py rename to dialogue_system/__init__.py diff --git a/modules/BackEnd/__init__.py b/dialogue_system/backend/__init__.py similarity index 100% rename from modules/BackEnd/__init__.py rename to dialogue_system/backend/__init__.py diff --git a/modules/DialogueManagement/__init__.py b/dialogue_system/backend/apis/__init__.py similarity index 100% rename from modules/DialogueManagement/__init__.py rename to dialogue_system/backend/apis/__init__.py diff --git a/dialogue_system/backend/apis/docomo_dialogue.py b/dialogue_system/backend/apis/docomo_dialogue.py new file mode 100644 index 0000000..e9a4044 --- /dev/null +++ b/dialogue_system/backend/apis/docomo_dialogue.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +import os + +from doco.client import Client + + +class DocomoDialogAPI(object): + + def __init__(self, api_key=None): + api_key = os.environ.get('DOCOMO_DIALOGUE_API_KEY', api_key) + self.__client = Client(apikey=api_key) + + def reply(self, text): + response = self.__client.send(utt=text, apiname='Dialogue') + utt = response['utt'] + + return utt diff --git a/modules/BackEnd/APIs/hotpepper.py b/dialogue_system/backend/apis/hotpepper.py similarity index 99% rename from modules/BackEnd/APIs/hotpepper.py rename to dialogue_system/backend/apis/hotpepper.py index 5ba7c4b..99813e6 100644 --- a/modules/BackEnd/APIs/hotpepper.py +++ b/dialogue_system/backend/apis/hotpepper.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import os + import requests @@ -8,7 +9,6 @@ class AreaNotFoundException(BaseException): class HotPepperGourmetAPI(object): - BASE_URL = 'http://webservice.recruit.co.jp/hotpepper/{0}/v1/' def __init__(self, api_key=None): @@ -79,4 +79,4 @@ def search_food(self, **kwargs): if __name__ == '__main__': api = HotPepperGourmetAPI() - api.search_food() \ No newline at end of file + api.search_food() diff --git a/dialogue_system/bot.py b/dialogue_system/bot.py new file mode 100644 index 0000000..6c8e29b --- /dev/null +++ b/dialogue_system/bot.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +from dialogue_system.dialogue_management.manager import DialogueManager +from dialogue_system.language_generation.generator import LanguageGenerator +from dialogue_system.language_understanding.language_understanding import RuleBasedLanguageUnderstanding + + +class Bot(object): + + def __init__(self): + self.generator = LanguageGenerator() + self.language_understanding = RuleBasedLanguageUnderstanding() + self.manager = DialogueManager() + + def reply(self, sent): + dialogue_act = self.language_understanding.execute(sent) + + self.manager.update_dialogue_state(dialogue_act) + sys_act_type = self.manager.select_action(dialogue_act) + + sent = self.generator.generate_sentence(sys_act_type) + + return sent diff --git a/modules/LanguageGeneration/__init__.py b/dialogue_system/dialogue_management/__init__.py similarity index 100% rename from modules/LanguageGeneration/__init__.py rename to dialogue_system/dialogue_management/__init__.py diff --git a/modules/DialogueManagement/manager.py b/dialogue_system/dialogue_management/manager.py similarity index 63% rename from modules/DialogueManagement/manager.py rename to dialogue_system/dialogue_management/manager.py index 85b962f..a1e862c 100644 --- a/modules/DialogueManagement/manager.py +++ b/dialogue_system/dialogue_management/manager.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- -from modules.DialogueManagement.state import DialogueState -from modules.BackEnd.APIs.hotpepper import HotPepperGourmetAPI +from copy import deepcopy + +from dialogue_system.dialogue_management.state import DialogueState +from dialogue_system.backend.apis.hotpepper import HotPepperGourmetAPI +from dialogue_system.backend.apis.docomo_dialogue import DocomoDialogAPI class DialogueManager(object): @@ -12,9 +15,13 @@ def update_dialogue_state(self, dialogue_act): self.dialogue_state.update(dialogue_act) def select_action(self, dialogue_act): - from copy import deepcopy sys_act = deepcopy(dialogue_act) - if not self.dialogue_state.has('LOCATION'): + if dialogue_act['user_act_type'] == 'other': + api = DocomoDialogAPI() + reply = api.reply(dialogue_act['utt']) + sys_act['sys_act_type'] = 'CHAT' + sys_act['utt'] = reply + elif not self.dialogue_state.has('LOCATION'): sys_act['sys_act_type'] = 'REQUEST_LOCATION' elif not self.dialogue_state.has('GENRE'): sys_act['sys_act_type'] = 'REQUEST_GENRE' @@ -28,5 +35,6 @@ def select_action(self, dialogue_act): restaurant = api.search_restaurant(area=area, food=food,budget=budget) sys_act['sys_act_type'] = 'INFORM_RESTAURANT' sys_act['restaurant'] = restaurant + self.dialogue_state.clear() - return sys_act \ No newline at end of file + return sys_act diff --git a/modules/DialogueManagement/state.py b/dialogue_system/dialogue_management/state.py similarity index 88% rename from modules/DialogueManagement/state.py rename to dialogue_system/dialogue_management/state.py index b99d750..c302a9a 100644 --- a/modules/DialogueManagement/state.py +++ b/dialogue_system/dialogue_management/state.py @@ -12,7 +12,7 @@ def update(self, dialogue_act): self.__state['MAXIMUM_AMOUNT'] = dialogue_act.get('MAXIMUM_AMOUNT', self.__state['MAXIMUM_AMOUNT']) def has(self, name): - return self.__state[name] != None + return self.__state.get(name, None) != None def get_area(self): return self.__state['LOCATION'] @@ -23,6 +23,9 @@ def get_food(self): def get_budget(self): return self.__state['MAXIMUM_AMOUNT'] + def clear(self): + self.__init__() + def __str__(self): import pprint return pprint.pformat(self.__state) \ No newline at end of file diff --git a/dialogue_system/knowledge/__init__.py b/dialogue_system/knowledge/__init__.py new file mode 100644 index 0000000..336d1c1 --- /dev/null +++ b/dialogue_system/knowledge/__init__.py @@ -0,0 +1 @@ +__author__ = 'h-nakayama' diff --git a/dialogue_system/knowledge/genre.yaml b/dialogue_system/knowledge/genre.yaml new file mode 100644 index 0000000..fba4c12 --- /dev/null +++ b/dialogue_system/knowledge/genre.yaml @@ -0,0 +1,212 @@ +和食全般: + - 和食全般 + - 和食 +懐石料理: + - 懐石料理 + - 懐石 +割烹: + - 割烹 +郷土料理: + - 郷土料理 +焼き鳥・鶏料理: + - 焼き鳥 + - 鶏料理 +串焼き・串揚げ: + - 串焼き + - 串揚げ +鍋料理: + - 鍋料理 + - 鍋 +しゃぶしゃぶ: + - しゃぶしゃぶ +すき焼き: + - すき焼き +かに・えび・うに・海鮮料理: + - かに + - えび + - うに + - 海鮮料理 + - カニ + - 蟹 + - エビ + - 海老 + - ウニ +寿司: + - 寿司 +そば・うどん: + - そば + - うどん +とんかつ: + - とんかつ + - かつ +てんぷら: + - てんぷら + - 天婦羅 + - 天麩羅 +うなぎ: + - うなぎ +お好み焼き・もんじゃ: + - お好み焼き + - もんじゃ +炭火焼: + - 炭火焼 +炉ばた焼き・炙り焼き: + - 炉端焼き + - 炉ばた焼き + - あぶり焼き + - 炙り焼き +豆腐料理: + - 豆腐料理 + - 豆腐 +沖縄料理: + - 沖縄料理 + - 琉球料理 +和風創作料理: + - 和風創作料理 +洋食全般: + - 洋食 +フランス料理: + - フランス料理 + - フレンチ +イタリア料理: + - イタリア料理 + - イタリアン +スペイン料理: + - スペイン料理 +メキシコ・中南米料理: + - メキシコ料理 + - メキシカン + - 中南米料理 +その他各国料理: + - その他各国料理 +オムレツ・オムライス: + - オムレツ + - オムライス +洋風創作料理: + - 洋風創作料理 +ピザ: + - ピザ +ステーキ: + - ステーキ +カレー・ハヤシライス: + - カレー + - ハヤシライス +ハンバーグ: + - ハンバーグ +にんにく料理: + - ニンニク料理 + - にんにく料理 +パスタ: + - パスタ + - スパゲティ +中華料理: + - 中華料理 + - 中華 +飲茶・餃子・点心: + - 飲茶 + - 餃子 + - 点心 +ラーメン: + - ラーメン + - らーめん +韓国料理: + - 韓国料理 +焼肉・ホルモン・ジンギスカン: + - 焼肉 + - 焼き肉 + - ホルモン + - ジンギスカン +タイ料理: + - タイ料理 +ベトナム料理: + - ベトナム料理 +その他アジア料理: + - アジア料理 + - アジアン +無国籍・多国籍料理: + - 無国籍・多国籍料理 +ケーキ・デザート: + - ケーキ + - デザート +ハンバーガ・サンドイッチ・パン: + - ハンバーガ + - サンドイッチ + - パン +ふぐ・てっちり: + - フグ + - ふぐ + - 河豚 + - てっちり + - 鉄ちり + - ふぐ料理 +鉄板焼き: + - 鉄板焼き + - 鉄板焼 +おでん: + - おでん +丼もの: + - 丼もの + - 丼物 + - どんぶり物 + - どんぶり料理 +たこ焼き・焼きそば: + - たこ焼き + - 焼きそば + - たこやき + - タコヤキ + - 蛸焼 + - 蛸焼き + - タコ焼き + - 焼き蕎麦 + - 焼きソバ + - 焼蕎麦 + - 固焼きそば + - ソース焼きそば + - やきそば + - ヤキソバ +地中海料理: + - 地中海料理 +シーフード: + - シーフード + - 魚介 + - 魚貝 + - 魚介類 + - 魚貝類 +チーズフォンデュ: + - チーズフォンデュ + - チーズ +シチュー・スープ: + - シチュー + - スープ + - 汁物 + - 吸い物 + - 吸物 +バーベキュー: + - バーベキュー + - BBQ +広東料理: + - 広東料理 +北京料理: + - 北京料理 +四川料理: + - 四川料理 +上海料理: + - 上海料理 +台湾料理: + - 台湾料理 +ビビンバ・冷麺・チゲ: + - ビビンバ + - 冷麺 + - チゲ + - 石焼ビビンバ +インド料理: + - インド料理 + - 印度料理 + - インドの料理 +チャーハン: + - チャーハン + - 炒り飯 + - 焼きめし + - 焼飯 + - 焼き飯 + - 炒飯 \ No newline at end of file diff --git a/dialogue_system/knowledge/locations.txt b/dialogue_system/knowledge/locations.txt new file mode 100644 index 0000000..c39f87b --- /dev/null +++ b/dialogue_system/knowledge/locations.txt @@ -0,0 +1,493 @@ +銀座 +東銀座 +有楽町 +新橋 +汐留 +虎ノ門 +築地 +月島 +日比谷 +新富町 +勝どき +晴海 +豊洲 +霞が関 +神谷町 +御成門 +内幸町 +神楽坂 +飯田橋 +水道橋 +後楽園 +春日 +千駄木 +白山 +本郷三丁目 +茗荷谷 +小石川 +江戸川橋 +護国寺 +有明 +台場 +青海 +お台場海浜公園 +船の科学館 +国際展示場 +丸の内 +日本橋 +八重洲 +大手町 +三越前 +新日本橋 +人形町 +水天宮前 +京橋・宝町 +八丁堀 +茅場町 +小伝馬町 +馬喰町 +浜町 +東日本橋 +馬喰横山 +四谷三丁目 +麹町 +市ヶ谷 +九段下 +四ツ谷 +半蔵門 +信濃町 +曙橋 +牛込柳町 +若松河田 +上野 +鶯谷 +浅草 +上野広小路 +御徒町 +湯島 +末広町 +蔵前 +田原町 +入谷 +三ノ輪 +本所吾妻橋 +日暮里 +葛飾区 +北千住 +綾瀬 +北綾瀬 +亀有 +金町 +西日暮里 +谷中 +町屋 +三河島 +南千住 +青砥 +高砂 +柴又 +立石 +お花茶屋 +堀切菖蒲園 +西新井 +竹ノ塚 +五反野 +梅島 +西新井大師 +高野 +舎人公園 +荒川区 +足立区 +田端 +錦糸町 +浅草橋 +小岩 +亀戸 +両国 +新小岩 +菊川 +平井 +瑞江 +篠崎 +一之江 +船堀 +押上 +東京スカイツリー +曳舟 +向島 +東向島 +墨田区 +江戸川区 +西葛西 +門前仲町 +東陽町 +木場 +南砂町 +葛西 +新木場 +葛西臨海公園 +清澄白河 +森下 +住吉・猿江 +西大島 +大島 +江東区 +神田 +秋葉原 +御茶ノ水 +神保町 +根津 +竹橋 +淡路町 +小川町 +岩本町 +品川 +田町 +浜松町 +五反田 +目黒 +三田 +芝浦 +大門 +芝公園 +東京タワー +大崎 +白金高輪 +高輪台 +白金 +白金台 +泉岳寺 +竹芝 +日の出 +天王洲アイル +品川シーサイド +北品川 +新馬場 +青物横丁 +品川区 +港区 +矢口渡 +下丸子 +蒲田 +大森 +大田区 +大森海岸 +平和島 +蓮沼 +武蔵新田 +鵜の木 +沼部 +羽田空港 +羽田 +大鳥居 +糀谷 +雑色 +道玄坂 +宮益坂 +渋谷センター街 +渋谷マークシティ +渋谷公園通り +渋谷東口 +桜丘 +神泉 +宇田川町 +渋谷文化村通り +渋谷新南口 +原宿 +青山 +表参道 +外苑前 +恵比寿 +広尾 +中目黒 +恵比寿ガーデンプレイス +代官山 +赤坂・赤坂見附 +六本木 +西麻布 +六本木一丁目 +乃木坂 +溜池山王 +永田町 +麻布十番 +南麻布 +赤羽橋 +ミッドタウン +六本木ヒルズ +自由が丘 +田園調布 +尾山台 +大岡山 +多摩川 +緑が丘 +奥沢 +九品仏 +東池袋 +池袋東口 +南池袋 +目白 +雑司が谷 +池袋西口 +赤羽 +王子 +十条 +東十条 +北区他 +歌舞伎町 +代々木 +新宿西口 +新宿東口 +新宿南口 +西新宿 +都庁 +西武新宿 +新宿三丁目 +新宿御苑 +千駄ヶ谷 +北参道 +参宮橋 +代々木公園 +代々木八幡 +大久保 +東新宿 +新大久保 +巣鴨 +大塚 +西巣鴨 +駒込 +新大塚 +千石 +要町 +椎名町 +千川 +中野 +高円寺 +阿佐ヶ谷 +新中野 +中野坂上 +方南町 +南阿佐ヶ谷 +東中野 +東高円寺 +新高円寺 +中野新橋 +中野富士見町 +下北沢 +代々木上原 +駒場東大前 +東北沢 +池ノ上 +新代田 +世田谷代田 +高田馬場駅 +早稲田 +西早稲田 +落合 +下落合 +三軒茶屋 +池尻大橋 +駒沢大学 +世田谷 +豪徳寺 +松陰神社前 +三宿 +若林 +上町 +宮の坂 +松原 +二子玉川 +用賀 +等々力 +上野毛 +桜新町 +祐天寺 +学芸大学 +都立大学 +笹塚 +明大前 +浜田山 +久我山 +永福町 +初台 +幡ヶ谷 +代田橋 +下高井戸 +西永福 +高井戸 +富士見ヶ丘 +東松原 +調布 +仙川 +府中 +よみうりランド +府中本町 +東府中 +飛田給 +千歳烏山 +芦花公園 +桜上水 +上北沢 +八幡山 +柴崎 +国領 +布田 +つつじヶ丘 +経堂 +千歳船橋 +梅ヶ丘 +成城学園前 +狛江 +祖師ヶ谷大蔵 +喜多見 +和泉多摩川 +戸越銀座 +大井町 +旗の台 +荏原中延 +戸越 +馬込 +西馬込 +西大井 +下神明 +戸越公園 +中延 +荏原町 +北千束 +武蔵小山 +不動前 +西小山 +洗足 +洗足池 +池上 +千鳥町 +久が原 +御嶽山 +雪が谷大塚 +石川台 +長原 +武蔵境 +東小金井 +武蔵小金井駅 +国分寺 +国立 +西国分寺 +国分寺市 +一橋学園 +小平市 +玉川上水 +昭島 +拝島 +福生 +羽村 +小作 +秋川 +羽村市 +あきる野市 +東大和 +小川 +青梅 +河辺 +奥多摩 +若葉台 +多摩センター +京王堀之内 +稲城 +京王多摩川 +京王永山 +唐木田 +南大沢 +多摩市 +吉祥寺 +荻窪 +三鷹 +井の頭公園 +西荻窪 +三鷹台 +町田駅 +鶴川 +玉川学園前 +成瀬 +多摩境 +町田市 +川崎市麻生区 +八王子 +日野 +立川 +豊田 +高尾山 +西八王子 +山田 +八王子みなみ野 +日野市 +八王子市 +立川市 +石神井公園 +大泉学園 +清瀬 +保谷 +ひばりヶ丘 +東久留米 +秋津 +中井 +野方 +下井草 +上石神井 +田無 +東村山 +久米川 +小平 +花小金井 +西武柳沢 +東伏見 +武蔵関 +上井草 +井荻 +鷺ノ宮 +都立家政 +沼袋 +新井薬師前 +板橋 +江古田 +練馬 +大山 +上板橋 +成増 +光が丘 +練馬春日町 +豊島園 +板橋区 +小竹向原 +氷川台 +平和台 +練馬区 +下赤塚 +東武練馬 +ときわ台 +中板橋 +下板橋 +東長崎 +桜台 +中村橋 +富士見台 +練馬高野台 +板橋本町 +志村坂上 +高島平 +西高島平 +新高島平 +西台 +蓮根 +志村三丁目 +本蓮沼 +板橋区役所前 +新板橋 +北野駅 +高幡不動 +聖蹟桜ヶ丘 +百草園 +中河原 +分倍河原 +南平 +平山城址公園 +長沼駅 +東京都 \ No newline at end of file diff --git a/dialogue_system/knowledge/reader.py b/dialogue_system/knowledge/reader.py new file mode 100644 index 0000000..9435d16 --- /dev/null +++ b/dialogue_system/knowledge/reader.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +import os +import yaml + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def read_locations(): + file_path = os.path.join(BASE_DIR, 'locations.txt') + with open(file_path, 'rb') as f: + locations = [loc.decode('utf-8').strip() for loc in f] + + return locations + + +def read_genres(): + file_path = os.path.join(BASE_DIR, 'genre.yaml') + with open(file_path, 'rb') as f: + genres = yaml.load(f) + + return genres \ No newline at end of file diff --git a/modules/LanguageUnderstanding/DialogueActType/__init__.py b/dialogue_system/language_generation/__init__.py similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/__init__.py rename to dialogue_system/language_generation/__init__.py diff --git a/modules/LanguageGeneration/generator.py b/dialogue_system/language_generation/generator.py similarity index 78% rename from modules/LanguageGeneration/generator.py rename to dialogue_system/language_generation/generator.py index c32595c..b64c145 100644 --- a/modules/LanguageGeneration/generator.py +++ b/dialogue_system/language_generation/generator.py @@ -8,7 +8,7 @@ def __init__(self): pass def generate_sentence(self, dialogue_act): - sent = 'S: ' + sent = '' if 'LOCATION' in dialogue_act: sent += '場所は{0}ですね。'.format(dialogue_act['LOCATION']) if 'GENRE' in dialogue_act: @@ -23,11 +23,16 @@ def generate_sentence(self, dialogue_act): sent += '料理のジャンルを教えてください。' elif sys_act_type == 'REQUEST_BUDGET': sent += '予算の上限はどのくらいですか?' + elif sys_act_type == 'CHAT': + sent += dialogue_act['utt'] elif sys_act_type == 'INFORM_RESTAURANT': restaurant = dialogue_act['restaurant'] if restaurant: name, address, access = restaurant['name'], restaurant['address'], restaurant['access'] - sent += 'では、{0}がおすすめです。\n場所は{1}で{2}です。'.format(name, address, access) + lat, lng = restaurant['lat'], restaurant['lng'] + sent += 'では、{0}がおすすめです。\n場所は{1}で{2}です。\n'.format(name, address, access) + url = 'https://maps.googleapis.com/maps/api/staticmap?center={0},{1}&size=640x480&zoom=14&markers={0},{1}' + sent += url.format(lat, lng) else: sent += '申し訳ありません。条件に一致するお店が見つかりませんでした。' else: diff --git a/modules/LanguageUnderstanding/NamedEntityExtraction/__init__.py b/dialogue_system/language_understanding/__init__.py similarity index 100% rename from modules/LanguageUnderstanding/NamedEntityExtraction/__init__.py rename to dialogue_system/language_understanding/__init__.py diff --git a/dialogue_system/language_understanding/attribute_extraction/__init__.py b/dialogue_system/language_understanding/attribute_extraction/__init__.py new file mode 100644 index 0000000..40a96af --- /dev/null +++ b/dialogue_system/language_understanding/attribute_extraction/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/modules/LanguageUnderstanding/NamedEntityExtraction/extractor.py b/dialogue_system/language_understanding/attribute_extraction/ml_based_extractor.py similarity index 94% rename from modules/LanguageUnderstanding/NamedEntityExtraction/extractor.py rename to dialogue_system/language_understanding/attribute_extraction/ml_based_extractor.py index 30777d5..88c5089 100644 --- a/modules/LanguageUnderstanding/NamedEntityExtraction/extractor.py +++ b/dialogue_system/language_understanding/attribute_extraction/ml_based_extractor.py @@ -1,18 +1,19 @@ # -*- coding: utf-8 -*- import os from itertools import chain + +import yaml import pycrfsuite -import sklearn from sklearn.metrics import classification_report from sklearn.preprocessing import LabelBinarizer -class NamedEntityExtractor(object): +class MLBasedAttributeExtractor(object): def __init__(self, model_file='model.crfsuite'): self.__tagger = pycrfsuite.Tagger() try: - file_path = os.path.join(os.path.dirname(__file__), model_file) + file_path = os.path.join(os.path.dirname(__file__), 'model', model_file) self.__tagger.open(file_path) except FileNotFoundError: print('Learn') @@ -78,7 +79,7 @@ def evaluate(self, y_true, y_pred): import os import pickle import random - from modules.LanguageUnderstanding.utils.utils import sent2features, sent2labels + from dialogue_system.language_understanding.utils.utils import sent2features, sent2labels f = lambda path: os.path.dirname(path) root_dir = f(f(f(f(__file__)))) @@ -104,7 +105,7 @@ def evaluate(self, y_true, y_pred): test_x = [sent2features(s) for s in test_sents] test_y = [sent2labels(s) for s in test_sents] - extractor = NamedEntityExtractor() + extractor = MLBasedAttributeExtractor() extractor.train(train_x, train_y) pred_y = [extractor.tagger(xseq) for xseq in test_x] diff --git a/modules/LanguageUnderstanding/NamedEntityExtraction/model.crfsuite b/dialogue_system/language_understanding/attribute_extraction/model/model.crfsuite similarity index 100% rename from modules/LanguageUnderstanding/NamedEntityExtraction/model.crfsuite rename to dialogue_system/language_understanding/attribute_extraction/model/model.crfsuite diff --git a/dialogue_system/language_understanding/attribute_extraction/rule_based_extractor.py b/dialogue_system/language_understanding/attribute_extraction/rule_based_extractor.py new file mode 100644 index 0000000..594c802 --- /dev/null +++ b/dialogue_system/language_understanding/attribute_extraction/rule_based_extractor.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +import re + + +from dialogue_system.knowledge.reader import read_genres, read_locations +from dialogue_system.language_understanding.utils.utils import kansuji2arabic + + +class RuleBasedAttributeExtractor(object): + + def __init__(self): + self.__locations = read_locations() + self.__genres = read_genres() + + def extract(self, text): + attribute = {'LOCATION': self.__extract_location(text), 'GENRE': self.__extract_genre(text), + 'MAXIMUM_AMOUNT': self.__extract_budget(text)} + + return attribute + + def __extract_location(self, text): + locations = [loc for loc in self.__locations if loc in text] + locations.sort(key=len, reverse=True) + location = locations[0] if len(locations) > 0 else '' + + return location + + def __extract_genre(self, text): + for food_genre, foods in self.__genres.items(): + for food in foods: + if food in text: + return food_genre + return '' + + def __extract_budget(self, text): + pattern = r'\d+円|[一二三四五六七八九十壱弐参拾百千万萬億兆〇]+円' + match_obj = re.findall(pattern, text) + budget_str = match_obj[0][:-1] if len(match_obj) > 0 else '' + budget_int = kansuji2arabic(budget_str) + + return budget_int \ No newline at end of file diff --git a/dialogue_system/language_understanding/dialogue_act_type/__init__.py b/dialogue_system/language_understanding/dialogue_act_type/__init__.py new file mode 100644 index 0000000..40a96af --- /dev/null +++ b/dialogue_system/language_understanding/dialogue_act_type/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/modules/LanguageUnderstanding/DialogueActType/predictor.py b/dialogue_system/language_understanding/dialogue_act_type/ml_based_estimator.py similarity index 93% rename from modules/LanguageUnderstanding/DialogueActType/predictor.py rename to dialogue_system/language_understanding/dialogue_act_type/ml_based_estimator.py index 450e354..477d13b 100644 --- a/modules/LanguageUnderstanding/DialogueActType/predictor.py +++ b/dialogue_system/language_understanding/dialogue_act_type/ml_based_estimator.py @@ -2,15 +2,15 @@ import os from sklearn.externals import joblib from sklearn.ensemble import RandomForestClassifier -from gensim import corpora, matutils -from modules.LanguageUnderstanding.utils.utils import * +from dialogue_system.language_understanding.utils.utils import * -class DialogueActTypePredictor(object): + +class MLBasedDialogueActTypeEstimator(object): def __init__(self, file_name='model.pkl'): try: - file_path = os.path.join(os.path.dirname(__file__), file_name) + file_path = os.path.join(os.path.dirname(__file__), 'model', file_name) self.estimator = joblib.load(file_path) except FileNotFoundError: self.estimator = RandomForestClassifier() @@ -82,7 +82,7 @@ def sent2features_(sent): test_x = [to_features(dictionary, words) for words in get_words(test_sents)] test_y = labels[train_num:] - predictor = DialogueActTypePredictor() + predictor = MLBasedDialogueActTypeEstimator() predictor.train(train_x, train_y) predictor.evaluate(test_x, test_y) diff --git a/modules/LanguageUnderstanding/DialogueActType/dic.txt b/dialogue_system/language_understanding/dialogue_act_type/model/dic.txt similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/dic.txt rename to dialogue_system/language_understanding/dialogue_act_type/model/dic.txt diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_01.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_01.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_01.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_01.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_02.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_02.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_02.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_02.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_03.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_03.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_03.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_03.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_04.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_04.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_04.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_04.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_05.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_05.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_05.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_05.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_06.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_06.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_06.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_06.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_07.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_07.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_07.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_07.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_08.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_08.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_08.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_08.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_09.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_09.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_09.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_09.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_10.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_10.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_10.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_10.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_11.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_11.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_11.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_11.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_12.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_12.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_12.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_12.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_13.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_13.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_13.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_13.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_14.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_14.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_14.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_14.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_15.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_15.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_15.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_15.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_16.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_16.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_16.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_16.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_17.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_17.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_17.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_17.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_18.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_18.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_18.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_18.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_19.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_19.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_19.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_19.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_20.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_20.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_20.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_20.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_21.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_21.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_21.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_21.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_22.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_22.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_22.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_22.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_23.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_23.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_23.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_23.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_24.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_24.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_24.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_24.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_25.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_25.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_25.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_25.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_26.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_26.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_26.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_26.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_27.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_27.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_27.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_27.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_28.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_28.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_28.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_28.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_29.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_29.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_29.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_29.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_30.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_30.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_30.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_30.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_31.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_31.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_31.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_31.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_32.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_32.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_32.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_32.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_33.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_33.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_33.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_33.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_34.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_34.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_34.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_34.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_35.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_35.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_35.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_35.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_36.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_36.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_36.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_36.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_37.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_37.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_37.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_37.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_38.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_38.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_38.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_38.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_39.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_39.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_39.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_39.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_40.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_40.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_40.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_40.npy diff --git a/modules/LanguageUnderstanding/DialogueActType/model.pkl_41.npy b/dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_41.npy similarity index 100% rename from modules/LanguageUnderstanding/DialogueActType/model.pkl_41.npy rename to dialogue_system/language_understanding/dialogue_act_type/model/model.pkl_41.npy diff --git a/dialogue_system/language_understanding/dialogue_act_type/rule_based_estimator.py b/dialogue_system/language_understanding/dialogue_act_type/rule_based_estimator.py new file mode 100644 index 0000000..ac5a9ec --- /dev/null +++ b/dialogue_system/language_understanding/dialogue_act_type/rule_based_estimator.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- + + +class RuleBasedDialogueActTypeEstimator(object): + + def __init__(self): + pass + + def estimate(self, attribute): + if attribute['GENRE'] != '': + return 'genre' + elif attribute['LOCATION'] != '': + return 'location' + elif attribute['MAXIMUM_AMOUNT'] != '': + return 'maximum_amount' + else: + return 'other' diff --git a/dialogue_system/language_understanding/language_understanding.py b/dialogue_system/language_understanding/language_understanding.py new file mode 100644 index 0000000..41a4d52 --- /dev/null +++ b/dialogue_system/language_understanding/language_understanding.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +import copy + +from dialogue_system.language_understanding.attribute_extraction.rule_based_extractor import RuleBasedAttributeExtractor +from dialogue_system.language_understanding.dialogue_act_type.rule_based_estimator import RuleBasedDialogueActTypeEstimator + +from dialogue_system.language_understanding.attribute_extraction.ml_based_extractor import MLBasedAttributeExtractor +from dialogue_system.language_understanding.dialogue_act_type.ml_based_estimator import MLBasedDialogueActTypeEstimator, sent2features_ +from dialogue_system.language_understanding.utils.utils import sent2features +from training_data_generator.scripts.analyzer import analyze_morph + + +class RuleBasedLanguageUnderstanding(object): + + def __init__(self): + self.__estimator = RuleBasedDialogueActTypeEstimator() + self.__extractor = RuleBasedAttributeExtractor() + + def execute(self, sent): + attribute = self.__extractor.extract(sent) + act_type = self.__estimator.estimate(attribute) + + dialogue_act = {'user_act_type': act_type, 'utt': sent} + attribute_cp = copy.copy(attribute) + for k, v in attribute_cp.items(): + if v == '': + del attribute[k] + dialogue_act.update(attribute) + + return dialogue_act + + +class MLBasedLanguageUnderstanding(object): + + def __init__(self): + self.__predictor = MLBasedDialogueActTypeEstimator() + self.__extractor = MLBasedAttributeExtractor() + + def execute(self, sent): + features = sent2features_(sent) + act_type = self.__predictor.predict([features]) + + surfaces, features = analyze_morph(sent) + morphed_sent = [[surfaces[i]] + features[i].split(',') for i in range(len(surfaces))] + features = sent2features(morphed_sent) + named_entity = self.__extractor.extract(features, morphed_sent) + + dialogue_act = {'user_act_type': act_type, 'utt': sent} + dialogue_act.update(dict(named_entity)) + + return dialogue_act + + +if __name__ == '__main__': + sent = 'ラーメンを食べたい' + language_understanding = MLBasedLanguageUnderstanding() + language_understanding.execute(sent) + sent = '西新宿' + language_understanding.execute(sent) + sent = '新宿近辺' + language_understanding.execute(sent) \ No newline at end of file diff --git a/modules/BackEnd/APIs/docomo_dialogue.py b/dialogue_system/language_understanding/utils/__init__.py similarity index 100% rename from modules/BackEnd/APIs/docomo_dialogue.py rename to dialogue_system/language_understanding/utils/__init__.py diff --git a/modules/LanguageUnderstanding/utils/utils.py b/dialogue_system/language_understanding/utils/utils.py similarity index 70% rename from modules/LanguageUnderstanding/utils/utils.py rename to dialogue_system/language_understanding/utils/utils.py index a6d3978..12551eb 100644 --- a/modules/LanguageUnderstanding/utils/utils.py +++ b/dialogue_system/language_understanding/utils/utils.py @@ -1,3 +1,54 @@ +import re +from gensim import corpora, matutils + + +tt_ksuji = str.maketrans('一二三四五六七八九〇壱弐参', '1234567890123') + +re_suji = re.compile(r'[十拾百千万億兆\d]+') +re_kunit = re.compile(r'[十拾百千]|\d+') +re_manshin = re.compile(r'[万億兆]|[^万億兆]+') + +TRANSUNIT = {'十': 10, + '拾': 10, + '百': 100, + '千': 1000} +TRANSMANS = {'万': 10000, + '億': 100000000, + '兆': 1000000000000} + + +def kansuji2arabic(string, sep=False): + """漢数字をアラビア数字に変換""" + + def _transvalue(sj, re_obj=re_kunit, transdic=TRANSUNIT): + unit = 1 + result = 0 + for piece in reversed(re_obj.findall(sj)): + if piece in transdic: + if unit > 1: + result += unit + unit = transdic[piece] + else: + val = int(piece) if piece.isdecimal() else _transvalue(piece) + result += val * unit + unit = 1 + + if unit > 1: + result += unit + + return result + + transuji = string.translate(tt_ksuji) + for suji in sorted(set(re_suji.findall(transuji)), key=lambda s: len(s), + reverse=True): + if not suji.isdecimal(): + arabic = _transvalue(suji, re_manshin, TRANSMANS) + arabic = '{:,}'.format(arabic) if sep else str(arabic) + transuji = transuji.replace(suji, arabic) + + return transuji + + def is_hiragana(ch): return 0x3040 <= ord(ch) <= 0x309F @@ -133,7 +184,7 @@ def get_words(sents): return res -from gensim import corpora, matutils + def create_dictionary(words): dic = corpora.Dictionary(words) diff --git a/knowledge/ontology/genre.yaml b/knowledge/ontology/genre.yaml deleted file mode 100644 index fafdf00..0000000 --- a/knowledge/ontology/genre.yaml +++ /dev/null @@ -1,128 +0,0 @@ -和食全般: - 和食全般,和食: -懐石料理: - 懐石料理,懐石: -割烹: - 割烹: -郷土料理: - 郷土料理: -焼き鳥・鶏料理: - 焼き鳥,鶏料理: -串焼き・串揚げ: - 串焼き,串揚げ: -鍋料理: - 鍋料理,鍋: -しゃぶしゃぶ: - しゃぶしゃぶ: -すき焼き: - すき焼き: -かに・えび・うに・海鮮料理: - かに,えび,うに,海鮮料理,カニ,蟹,エビ,海老,ウニ: -寿司: - 寿司: -そば・うどん: - そば,うどん: -とんかつ: - とんかつ,かつ: -てんぷら: - てんぷら,天婦羅,天麩羅: -うなぎ: - うなぎ: -お好み焼き・もんじゃ: - お好み焼き,もんじゃ: -炭火焼: - 炭火焼: -炉ばた焼き・炙り焼き: - 炉端焼き,炉ばた焼き,あぶり焼き,炙り焼き: -豆腐料理: - 豆腐料理,豆腐: -沖縄料理: - 沖縄料理,琉球料理: -和風創作料理: - 和風創作料理: -洋食全般: - 洋食: -フランス料理: - フランス料理,フレンチ: -イタリア料理: - イタリア料理,イタリアン: -スペイン料理: - スペイン料理: -メキシコ・中南米料理: - メキシコ料理,メキシカン,中南米料理: -その他各国料理: - その他各国料理: -オムレツ・オムライス: - オムレツ,オムライス: -洋風創作料理: - 洋風創作料理: -ピザ: - ピザ: -ステーキ: - ステーキ: -カレー・ハヤシライス: - カレー,ハヤシライス: -ハンバーグ: - ハンバーグ: -にんにく料理: - ニンニク料理,にんにく料理: -パスタ: - パスタ,スパゲティ: -中華料理: - 中華料理,中華: -飲茶・餃子・点心: - 飲茶,餃子,点心: -ラーメン: - ラーメン,らーめん: -韓国料理: - 韓国料理: -焼肉・ホルモン・ジンギスカン: - 焼肉,焼き肉,ホルモン,ジンギスカン: -タイ料理: - タイ料理: -ベトナム料理: - ベトナム料理: -その他アジア料理: - アジア料理,アジアン: -無国籍・多国籍料理: - 無国籍・多国籍料理: -ケーキ・デザート: - ケーキ,デザート: -ハンバーガ・サンドイッチ・パン: - ハンバーガ,サンドイッチ,パン: -ふぐ・てっちり: - フグ,ふぐ,河豚,てっちり,鉄ちり,ふぐ料理: -鉄板焼き: - 鉄板焼き,鉄板焼: -おでん: - おでん: -丼もの: - 丼もの,丼物,どんぶり物,どんぶり料理: -たこ焼き・焼きそば: - たこ焼き,焼きそば,たこやき,タコヤキ,蛸焼,蛸焼き,タコ焼き,焼き蕎麦,焼きソバ,焼蕎麦,固焼きそば,ソース焼きそば,やきそば,ヤキソバ: -地中海料理: - 地中海料理: -シーフード: - シーフード,魚介,魚貝,魚介類,魚貝類: -チーズフォンデュ: - チーズフォンデュ,チーズ: -シチュー・スープ: - シチュー,スープ,汁物,吸い物,吸物: -バーベキュー: - バーベキュー,BBQ: -広東料理: - 広東料理: -北京料理: - 北京料理: -四川料理: - 四川料理: -上海料理: - 上海料理: -台湾料理: - 台湾料理: -ビビンバ・冷麺・チゲ: - ビビンバ,冷麺,チゲ,石焼ビビンバ: -インド料理: - インド料理,印度料理,インドの料理: -チャーハン: - チャーハン,炒り飯,焼きめし,焼飯,焼き飯,炒飯: \ No newline at end of file diff --git a/knowledge/reader.py b/knowledge/reader.py deleted file mode 100644 index a9e85a5..0000000 --- a/knowledge/reader.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import yaml - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -file_path = os.path.join(BASE_DIR, 'ontology/genre.yaml') - -with open(file_path, 'r') as f: - data = yaml.load(f) \ No newline at end of file diff --git a/modules/LanguageUnderstanding/__init__.py b/modules/LanguageUnderstanding/__init__.py deleted file mode 100644 index 7c68785..0000000 --- a/modules/LanguageUnderstanding/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- \ No newline at end of file diff --git a/modules/LanguageUnderstanding/language_understanding.py b/modules/LanguageUnderstanding/language_understanding.py deleted file mode 100644 index bde6ad3..0000000 --- a/modules/LanguageUnderstanding/language_understanding.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -from modules.LanguageUnderstanding.DialogueActType.predictor import DialogueActTypePredictor, sent2features_ -from modules.LanguageUnderstanding.NamedEntityExtraction.extractor import NamedEntityExtractor -from modules.LanguageUnderstanding.utils.utils import sent2features -from training_data_generator.scripts.analyzer import analyze_morph - - -class LanguageUnderstanding(object): - - def __init__(self): - self.__predictor = DialogueActTypePredictor() - self.__extractor = NamedEntityExtractor() - - def execute(self, sent): - features = sent2features_(sent) - act_type = self.__predictor.predict([features]) - - surfaces, features = analyze_morph(sent) - morphed_sent = [[surfaces[i]] + features[i].split(',') for i in range(len(surfaces))] - features = sent2features(morphed_sent) - named_entity = self.__extractor.extract(features, morphed_sent) - - dialogue_act = {'user_act_type': act_type} - dialogue_act.update(dict(named_entity)) - - return dialogue_act - - -if __name__ == '__main__': - sent = 'ラーメンを食べたい' - language_understanding = LanguageUnderstanding() - language_understanding.execute(sent) - sent = '西新宿' - language_understanding.execute(sent) - sent = '新宿近辺' - language_understanding.execute(sent) \ No newline at end of file diff --git a/modules/__init__.py b/modules/__init__.py deleted file mode 100644 index 7c68785..0000000 --- a/modules/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- \ No newline at end of file diff --git a/tests/backend/docomo_dialog_test.py b/tests/backend/docomo_dialog_test.py new file mode 100644 index 0000000..9da144c --- /dev/null +++ b/tests/backend/docomo_dialog_test.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +import unittest + +from dialogue_system.backend.apis.docomo_dialogue import DocomoDialogAPI + + +class DocomoDialogAPITest(unittest.TestCase): + + def setUp(self): + self.api = DocomoDialogAPI() + + def tearDown(self): + pass + + def test_areaname_to_areacode(self): + reply = self.api.reply(text='こんにちは') + self.assertIsInstance(reply, str) \ No newline at end of file diff --git a/tests/backend/hotpepper_test.py b/tests/backend/hotpepper_test.py index 08440dc..021087a 100644 --- a/tests/backend/hotpepper_test.py +++ b/tests/backend/hotpepper_test.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import unittest -from modules.BackEnd.APIs.hotpepper import HotPepperGourmetAPI, AreaNotFoundException + +from dialogue_system.backend.apis.hotpepper import HotPepperGourmetAPI, AreaNotFoundException class HotPepperAPITest(unittest.TestCase): diff --git a/tests/language_understanding/__init__.py b/tests/language_understanding/__init__.py new file mode 100644 index 0000000..336d1c1 --- /dev/null +++ b/tests/language_understanding/__init__.py @@ -0,0 +1 @@ +__author__ = 'h-nakayama' diff --git a/tests/language_understanding/attribute_extraction/__init__.py b/tests/language_understanding/attribute_extraction/__init__.py new file mode 100644 index 0000000..336d1c1 --- /dev/null +++ b/tests/language_understanding/attribute_extraction/__init__.py @@ -0,0 +1 @@ +__author__ = 'h-nakayama' diff --git a/tests/language_understanding/attribute_extraction/extractor_test.py b/tests/language_understanding/attribute_extraction/extractor_test.py new file mode 100644 index 0000000..ddd3c63 --- /dev/null +++ b/tests/language_understanding/attribute_extraction/extractor_test.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +import unittest + +from dialogue_system.language_understanding.attribute_extraction.rule_based_extractor import RuleBasedAttributeExtractor + + +class AttributeExtractorTest(unittest.TestCase): + + def setUp(self): + self.extractor = RuleBasedAttributeExtractor() + + def tearDown(self): + pass + + def test_extract(self): + attribute = self.extractor.extract(text='ラーメンを食べたい') + self.assertEqual(attribute, {'LOCATION': '', 'GENRE': 'ラーメン', 'MAXIMUM_AMOUNT': ''}) + attribute = self.extractor.extract(text='西新宿のあたり') + self.assertEqual(attribute, {'LOCATION': '西新宿', 'GENRE': '', 'MAXIMUM_AMOUNT': ''}) + attribute = self.extractor.extract(text='1000円以下で') + self.assertEqual(attribute, {'LOCATION': '', 'GENRE': '', 'MAXIMUM_AMOUNT': '1000'}) \ No newline at end of file diff --git a/tests/language_understanding/dialogue_act_type/__init__.py b/tests/language_understanding/dialogue_act_type/__init__.py new file mode 100644 index 0000000..336d1c1 --- /dev/null +++ b/tests/language_understanding/dialogue_act_type/__init__.py @@ -0,0 +1 @@ +__author__ = 'h-nakayama' diff --git a/tests/language_understanding/dialogue_act_type/estimator.py b/tests/language_understanding/dialogue_act_type/estimator.py new file mode 100644 index 0000000..744d9de --- /dev/null +++ b/tests/language_understanding/dialogue_act_type/estimator.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +import unittest + +from dialogue_system.language_understanding.attribute_extraction.rule_based_extractor import RuleBasedAttributeExtractor +from dialogue_system.language_understanding.dialogue_act_type.rule_based_estimator import RuleBasedDialogueActTypeEstimator + + +class AttributeExtractorTest(unittest.TestCase): + + def setUp(self): + self.extractor = RuleBasedAttributeExtractor() + self.estimator = RuleBasedDialogueActTypeEstimator() + + def tearDown(self): + pass + + def test_extract(self): + attribute = self.extractor.extract(text='ラーメンを食べたい') + act_type = self.estimator.estimate(attribute) + self.assertEqual(act_type, 'genre') + attribute = self.extractor.extract(text='西新宿のあたり') + act_type = self.estimator.estimate(attribute) + self.assertEqual(act_type, 'location') + attribute = self.extractor.extract(text='1000円以下で') + act_type = self.estimator.estimate(attribute) + self.assertEqual(act_type, 'maximum_amount') + attribute = self.extractor.extract(text='こんにちは') + act_type = self.estimator.estimate(attribute) + self.assertEqual(act_type, 'other') \ No newline at end of file diff --git a/training_data_generator/scripts/genre_maker.py b/training_data_generator/scripts/genre_maker.py index c5264a6..ccc10d1 100644 --- a/training_data_generator/scripts/genre_maker.py +++ b/training_data_generator/scripts/genre_maker.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import os -from knowledge.reader import data +from dialogue_system.knowledge.reader import data if __name__ == '__main__': BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) diff --git a/training_data_generator/scripts/training_data_factory.py b/training_data_generator/scripts/training_data_factory.py index 009bed2..464c1c9 100644 --- a/training_data_generator/scripts/training_data_factory.py +++ b/training_data_generator/scripts/training_data_factory.py @@ -13,7 +13,7 @@ -file_list = ['genre.txt', 'location.txt', 'maximum_amount.txt'] +file_list = ['genre.txt', 'locations.txt', 'maximum_amount.txt'] template_strs = ['GENRE', 'LOCATION', 'MAXIMUM_AMOUNT'] for file_name, template_str in zip(file_list, template_strs):