Skip to content

Commit

Permalink
Add: ユーザー辞書機能で対応する品詞の種類を拡充
Browse files Browse the repository at this point in the history
「固有名詞」の中のどういうカテゴリなのかも含めて辞書登録できるようにする
  • Loading branch information
tsukumijima committed Jan 6, 2025
1 parent cd110d6 commit e2d1053
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 8 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion voicevox_engine/app/routers/user_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def add_user_dict_word(
word_type: Annotated[
WordTypes | SkipJsonSchema[None],
Query(
description="PROPER_NOUN(固有名詞)、COMMON_NOUN(普通名詞)、VERB(動詞)、ADJECTIVE(形容詞)、SUFFIX(語尾)のいずれか"
description="PROPER_NOUN(固有名詞)、LOCATION_NAME(地名)、ORGANIZATION_NAME(組織・施設名)、PERSON_NAME(人名)、PERSON_FAMILY_NAME(姓)、PERSON_GIVEN_NAME(名)、COMMON_NOUN(普通名詞)、VERB(動詞)、ADJECTIVE(形容詞)、SUFFIX(語尾)のいずれか"
),
] = None,
priority: Annotated[
Expand Down
15 changes: 10 additions & 5 deletions voicevox_engine/user_dict/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,16 @@
class WordTypes(str, Enum):
"""品詞"""

PROPER_NOUN = "PROPER_NOUN"
COMMON_NOUN = "COMMON_NOUN"
VERB = "VERB"
ADJECTIVE = "ADJECTIVE"
SUFFIX = "SUFFIX"
PROPER_NOUN = "PROPER_NOUN" # 固有名詞
LOCATION_NAME = "LOCATION_NAME" # 地名
ORGANIZATION_NAME = "ORGANIZATION_NAME" # 組織・施設名
PERSON_NAME = "PERSON_NAME" # 人名
PERSON_FAMILY_NAME = "PERSON_FAMILY_NAME" # 姓
PERSON_GIVEN_NAME = "PERSON_GIVEN_NAME" # 名
COMMON_NOUN = "COMMON_NOUN" # 一般名詞
VERB = "VERB" # 動詞
ADJECTIVE = "ADJECTIVE" # 形容詞
SUFFIX = "SUFFIX" # 接尾辞


USER_DICT_MIN_PRIORITY = 0
Expand Down
50 changes: 50 additions & 0 deletions voicevox_engine/user_dict/user_dict_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ class _PartOfSpeechDetail:


_costs_proper_noun = [-988, 3488, 4768, 6048, 7328, 8609, 8734, 8859, 8984, 9110, 14176]
_costs_location_name = [147, 4350, 5403, 6456, 7509, 8562, 8778, 8994, 9210, 9427, 13960] # fmt: skip # noqa
_costs_organization_name = [1691, 4087, 4808, 5530, 6251, 6973, 7420, 7867, 8314, 8762, 13887] # fmt: skip # noqa
_costs_person_name = [515, 516, 2253, 3990, 5727, 7464, 8098, 8732, 9366, 10000, 10001]
_costs_person_family_name = [1991, 4126, 4974, 5823, 6672, 7521, 8001, 8481, 8961, 9442, 12808] # fmt: skip # noqa
_costs_person_given_name = [2209, 3772, 4905, 6038, 7171, 8304, 8728, 9152, 9576, 10000, 13842] # fmt: skip # noqa
_costs_common_noun = [-4445, 49, 1473, 2897, 4321, 5746, 6554, 7362, 8170, 8979, 15001]
_costs_verb = [3100, 6160, 6360, 6561, 6761, 6962, 7414, 7866, 8318, 8771, 13433]
_costs_adjective = [1527, 3266, 3561, 3857, 4153, 4449, 5149, 5849, 6549, 7250, 10001]
Expand All @@ -42,6 +47,51 @@ class _PartOfSpeechDetail:
cost_candidates=_costs_proper_noun,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
WordTypes.LOCATION_NAME: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="固有名詞",
part_of_speech_detail_2="地域",
part_of_speech_detail_3="一般",
context_id=1353,
cost_candidates=_costs_location_name,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
WordTypes.ORGANIZATION_NAME: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="固有名詞",
part_of_speech_detail_2="組織",
part_of_speech_detail_3="*",
context_id=1352,
cost_candidates=_costs_organization_name,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
WordTypes.PERSON_NAME: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="固有名詞",
part_of_speech_detail_2="人名",
part_of_speech_detail_3="一般",
context_id=1349,
cost_candidates=_costs_person_name,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
WordTypes.PERSON_FAMILY_NAME: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="固有名詞",
part_of_speech_detail_2="人名",
part_of_speech_detail_3="姓",
context_id=1350,
cost_candidates=_costs_person_family_name,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
WordTypes.PERSON_GIVEN_NAME: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="固有名詞",
part_of_speech_detail_2="人名",
part_of_speech_detail_3="名",
context_id=1351,
cost_candidates=_costs_person_given_name,
accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"],
),
WordTypes.COMMON_NOUN: _PartOfSpeechDetail(
part_of_speech="名詞",
part_of_speech_detail_1="一般",
Expand Down

0 comments on commit e2d1053

Please sign in to comment.