Skip to content

Commit

Permalink
Add Unicode Normalization to speech and braille (#16521)
Browse files Browse the repository at this point in the history
Closes #16466

Summary of the issue:
Several speech synthesizers and braille tables are unable to speak or braille some characters, such as ligatures ("ij") or decomposed characters (latin letters with a modifier to add acute, diaeresis, etc.). Also, italic or bold Unicode characters can't be spoken or brailled by default.

Description of user facing changes
None by default. If unicode normalization is enabled for speech, speech output for objects and text navigation is normalized. For braille, normalization is applied for all braille output.

The reason for speech to apply normalization only for objects and text navigation is chosen on purpose, as for individual character navigation or text selection, we really want to pass the original character to the synthesizer. If we don't Unicode bold and italic characters are read as their normalized counterparts, which makes it impossible to distinguish them. This problem is less relevant when working with braille.

Description of development approach
Added UnicodeNormalizationOffsetConverter to textUtils with tests. It stores the normalized version of a given string and based on diffing, calculates offset mappings between the original and normalized strings.
Processes output using UnicodeNormalizationOffsetConverter when translating braille with normalization on. The several mappings (braille to raw pos, raw to braille pos) are processed to account for normalization.
Added normalization to getTextInfoSpeech and getPropertiesSpeech.
  • Loading branch information
LeonarddeR authored May 21, 2024
1 parent bec58a4 commit 57ce236
Show file tree
Hide file tree
Showing 8 changed files with 340 additions and 28 deletions.
36 changes: 32 additions & 4 deletions source/braille.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import brailleViewer
from autoSettingsUtils.driverSetting import BooleanDriverSetting, NumericDriverSetting
from utils.security import objectBelowLockScreenAndWindowsIsLocked
from textUtils import isUnicodeNormalized, UnicodeNormalizationOffsetConverter
import hwIo
from editableText import EditableText

Expand Down Expand Up @@ -496,13 +497,40 @@ def update(self):
mode = louis.dotsIO
if config.conf["braille"]["expandAtCursor"] and self.cursorPos is not None:
mode |= louis.compbrlAtCursor
self.brailleCells, self.brailleToRawPos, self.rawToBraillePos, self.brailleCursorPos = louisHelper.translate(

converter: UnicodeNormalizationOffsetConverter | None = None
if config.conf["braille"]["unicodeNormalization"] and not isUnicodeNormalized(self.rawText):
converter = UnicodeNormalizationOffsetConverter(self.rawText)
textToTranslate = converter.encoded
# Typeforms must be adapted to represent normalized characters.
textToTranslateTypeforms = [
self.rawTextTypeforms[strOffset] for strOffset in converter.computedEncodedToStrOffsets
]
# Convert the cursor position to a normalized offset.
cursorPos = converter.strToEncodedOffsets(self.cursorPos)
else:
textToTranslate = self.rawText
textToTranslateTypeforms = self.rawTextTypeforms
cursorPos = self.cursorPos

self.brailleCells, brailleToRawPos, rawToBraillePos, self.brailleCursorPos = louisHelper.translate(
[handler.table.fileName, "braille-patterns.cti"],
self.rawText,
typeform=self.rawTextTypeforms,
textToTranslate,
typeform=textToTranslateTypeforms,
mode=mode,
cursorPos=self.cursorPos
cursorPos=cursorPos
)

if converter:
# The received brailleToRawPos contains braille to normalized positions.
# Process them to represent real raw positions by converting them from normalized ones.
brailleToRawPos = [converter.encodedToStrOffsets(i) for i in brailleToRawPos]
# The received rawToBraillePos contains normalized to braille positions.
# Create a new list based on real raw positions.
rawToBraillePos = [rawToBraillePos[i] for i in converter.computedStrToEncodedOffsets]
self.brailleToRawPos = brailleToRawPos
self.rawToBraillePos = rawToBraillePos

if (
self.selectionStart is not None
and self.selectionEnd is not None
Expand Down
2 changes: 2 additions & 0 deletions source/config/configSpec.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
# symbolLevel: One of the characterProcessing.SymbolLevel values.
symbolLevel = integer(default=100)
trustVoiceLanguage = boolean(default=true)
unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
includeCLDR = boolean(default=True)
beepSpeechModePitch = integer(default=10000,min=50,max=11025)
outputDevice = string(default=default)
Expand Down Expand Up @@ -82,6 +83,7 @@
optionsEnum="ReviewRoutingMovesSystemCaretFlag", behaviorOfDefault="NEVER")
readByParagraph = boolean(default=false)
wordWrap = boolean(default=true)
unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled")
focusContextPresentation = option("changedContext", "fill", "scroll", default="changedContext")
interruptSpeechWhileScrolling = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
showSelection = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled")
Expand Down
24 changes: 24 additions & 0 deletions source/gui/settingsDialogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1589,6 +1589,17 @@ def makeSettings(self, settingsSizer):
self.bindHelpEvent("SpeechSettingsTrust", self.trustVoiceLanguageCheckbox)
self.trustVoiceLanguageCheckbox.SetValue(config.conf["speech"]["trustVoiceLanguage"])

self.unicodeNormalizationCombo: nvdaControls.FeatureFlagCombo = settingsSizerHelper.addLabeledControl(
labelText=_(
# Translators: This is a label for a combo-box in the Speech settings panel.
"Unicode normali&zation"
),
wxCtrlClass=nvdaControls.FeatureFlagCombo,
keyPath=["speech", "unicodeNormalization"],
conf=config.conf,
)
self.bindHelpEvent("SpeechUnicodeNormalization", self.unicodeNormalizationCombo)

includeCLDRText = _(
# Translators: This is the label for a checkbox in the
# voice settings panel (if checked, data from the unicode CLDR will be used
Expand Down Expand Up @@ -1701,6 +1712,7 @@ def onSave(self):
self.symbolLevelList.GetSelection()
].value
config.conf["speech"]["trustVoiceLanguage"] = self.trustVoiceLanguageCheckbox.IsChecked()
self.unicodeNormalizationCombo.saveCurrentValueToConf()
currentIncludeCLDR = config.conf["speech"]["includeCLDR"]
config.conf["speech"]["includeCLDR"] = newIncludeCldr = self.includeCLDRCheckbox.IsChecked()
if currentIncludeCLDR is not newIncludeCldr:
Expand Down Expand Up @@ -4145,6 +4157,17 @@ def makeSettings(self, settingsSizer):
self.bindHelpEvent("BrailleSettingsWordWrap", self.wordWrapCheckBox)
self.wordWrapCheckBox.Value = config.conf["braille"]["wordWrap"]

self.unicodeNormalizationCombo: nvdaControls.FeatureFlagCombo = sHelper.addLabeledControl(
labelText=_(
# Translators: This is a label for a combo-box in the Braille settings panel.
"Unicode normali&zation"
),
wxCtrlClass=nvdaControls.FeatureFlagCombo,
keyPath=["braille", "unicodeNormalization"],
conf=config.conf,
)
self.bindHelpEvent("BrailleUnicodeNormalization", self.unicodeNormalizationCombo)

self.brailleInterruptSpeechCombo: nvdaControls.FeatureFlagCombo = sHelper.addLabeledControl(
labelText=_(
# Translators: This is a label for a combo-box in the Braille settings panel.
Expand Down Expand Up @@ -4184,6 +4207,7 @@ def onSave(self):
self.brailleReviewRoutingMovesSystemCaretCombo.saveCurrentValueToConf()
config.conf["braille"]["readByParagraph"] = self.readByParagraphCheckBox.Value
config.conf["braille"]["wordWrap"] = self.wordWrapCheckBox.Value
self.unicodeNormalizationCombo.saveCurrentValueToConf()
config.conf["braille"]["focusContextPresentation"] = self.focusContextPresentationValues[self.focusContextPresentationList.GetSelection()]
self.brailleInterruptSpeechCombo.saveCurrentValueToConf()
self.brailleShowSelectionCombo.saveCurrentValueToConf()
Expand Down
11 changes: 9 additions & 2 deletions source/speech/speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import speechDictHandler
import characterProcessing
import languageHandler
from textUtils import unicodeNormalize
from . import manager
from .extensions import speechCanceled, pre_speechCanceled, pre_speech
from .extensions import filter_speechSequence, speechCanceled
Expand Down Expand Up @@ -1568,6 +1569,8 @@ def getTextInfoSpeech( # noqa: C901
# There was content after the indentation, so there is no more indentation.
indentationDone=True
if command:
if config.conf["speech"]["unicodeNormalization"]:
command = unicodeNormalize(command)
if inTextChunk:
relativeSpeechSequence[-1]+=command
else:
Expand Down Expand Up @@ -1775,7 +1778,7 @@ def getPropertiesSpeech( # noqa: C901
reason: OutputReason = OutputReason.QUERY,
**propertyValues
) -> SpeechSequence:
textList: List[str] = []
textList: SpeechSequence = []
name: Optional[str] = propertyValues.get('name')
if name:
textList.append(name)
Expand Down Expand Up @@ -1968,7 +1971,11 @@ def getPropertiesSpeech( # noqa: C901
errorMessage: str | None = propertyValues.get("errorMessage", None)
if errorMessage:
textList.append(errorMessage)

if config.conf["speech"]["unicodeNormalization"]:
textList = [
unicodeNormalize(t) if isinstance(t, str) else t
for t in textList
]
types.logBadSequenceTypes(textList)
return textList

Expand Down
Loading

0 comments on commit 57ce236

Please sign in to comment.