From f9cf6801acd5500b99c4d79bf4636e6110b4cdac Mon Sep 17 00:00:00 2001 From: Genarito Date: Sun, 1 Aug 2021 19:47:33 -0300 Subject: [PATCH] + Improved dumps method --- gura/GuraParser.py | 151 +++++++++++++++++++++++++-------------------- 1 file changed, 84 insertions(+), 67 deletions(-) diff --git a/gura/GuraParser.py b/gura/GuraParser.py index e71c5f6..4072218 100644 --- a/gura/GuraParser.py +++ b/gura/GuraParser.py @@ -1,5 +1,5 @@ import os -from typing import Dict, Any, Optional, List, Set, Tuple, cast +from typing import Dict, Any, Optional, List, Set, Tuple from gura.Parser import ParseError, Parser from enum import Enum, auto @@ -39,7 +39,7 @@ class InvalidIndentationError(Exception): # Acceptable chars for keys KEY_ACCEPTABLE_CHARS = '0-9A-Za-z_' -# Special characters to be escaped +# Special characters to be escaped when parsing values ESCAPE_SEQUENCES = { 'b': '\b', 'f': '\f', @@ -51,6 +51,21 @@ class InvalidIndentationError(Exception): '$': '$' } +# Sequences that need escaped when dumping string values +SEQUENCES_TO_ESCAPE = { + '\\': '\\\\', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', + '"': '\\"', + '$': '\\$', +} + +# Indentation of 4 spaces +INDENT = ' ' + class MatchResultType(Enum): USELESS_LINE = auto(), @@ -681,87 +696,88 @@ def literal_string(self) -> MatchResult: return MatchResult(MatchResultType.PRIMITIVE, ''.join(chars)) - def dumps(self, value, indentation_level: int, new_line: bool) -> str: + @staticmethod + def dumps(value: Dict) -> str: """ Generates a Gura string from a dictionary (aka. stringify). Takes a value, check its type and returns its correct value in a recursive way :param value: Value retrieved from dict to transform in string - :param indentation_level: Current indentation level to compute indentation in string - :param new_line: If True, it prints a new line at the end of some values. This prevents some issues when dumps an object or array :return: String representation of the received value """ - new_line_char = '\n' if new_line else '' if value is None: - return f'null{new_line_char}' + return 'null' + value_type = type(value) if value_type == str: - # Uses literal string to prevent errors with special chars. Also escapes single quotes - value = cast(str, value) - escaped = value.replace("'", "\\'") - return f"'{escaped}'{new_line_char}" - if value_type in (int, float): - return str(value) + new_line_char + result = '' + + # Escapes everything that needs to be escaped + for char in value: + result += SEQUENCES_TO_ESCAPE.get(char, char) + + return f'"{result}"' if value_type == bool: - return ('true' if value is True else 'false') + new_line_char + return 'true' if value is True else 'false' + if value_type in (int, float): + return str(value) if value_type == dict: - if len(value) > 0: - result = '' - indentation = ' ' * (indentation_level * 4) - for key, dict_value in value.items(): - result += f'{indentation}{key}:' - # If it is an object it does not add a whitespace after key - if type(dict_value) != dict: - result += ' ' + if len(value) == 0: + return 'empty' + result = '' + for key, dict_value in value.items(): + result += f'{key}:' + + # If the value is an object, splits the stringified value by + # newline and indents each line before adding it to the result + if type(dict_value) == dict: + stringified_value = dumps(dict_value).rstrip() + if len(dict_value) > 0: + result += '\n' + + for line in stringified_value.split('\n'): + result += INDENT + line + '\n' + else: + # Prevents indentation on empty objects + result += ' ' + stringified_value + '\n' + # Otherwise adds the stringified value + else: + result += f' {dumps(dict_value)}\n' - result += self.dumps(dict_value, indentation_level + 1, new_line=True) - return '\n' + result + return result + if value_type == list: + should_multiline = any((type(e) == dict or type(e) == list) and len(e) > 0 for e in value) - # Empty object - return ' empty\n' + if not should_multiline: + stringify_values = list(map(dumps, value)) + return f'[{", ".join(stringify_values)}]' - if value_type == list: - # Lists are a special case: if it has an object, and indented representation must be returned. In case - # of primitive values or nested arrays, a plain representation is more appropriated - list_values = [] - at_least_one_obj = False - for list_elem in value: - is_obj = type(list_elem) == dict - str_value = self.dumps(list_elem, indentation_level, new_line=is_obj) - - # Prevents multiples new lines - if is_obj: - str_value = str_value.rstrip('\n') - at_least_one_obj = True - - list_values.append(str_value) - list_str = '[' - - # If there is at least one object adds an indentation to every non object value - if at_least_one_obj: - list_str += '\n' - list_joined_str = '' - last_idx = len(list_values) - 1 - for idx, elem in enumerate(list_values): - elem_is_obj = elem.startswith('\n') - if not elem_is_obj: - elem = ' ' * 4 + elem - else: - elem = elem.lstrip('\n') - list_joined_str += elem - if idx != last_idx: - list_joined_str += ',\n' - else: - # In case of primitive or nested arrays, just returns a plain representation - list_joined_str = ', '.join(list_values) - list_str += list_joined_str + result = '[' + + last_idx = len(value) - 1 + for idx, entry in enumerate(value): + stringified_value = dumps(entry).rstrip() - # Adds a last new line to append closing bracket - if at_least_one_obj: - list_str += '\n' - return list_str + ']' + new_line_char + result += '\n' + + # If the stringified value contains multiple lines, indents all + # of them and adds them all to the result + if '\n' in stringified_value: + splitted = stringified_value.split('\n') + splitted = map(lambda element: INDENT + element, splitted) + result += '\n'.join(splitted) + else: + # Otherwise indent the value and add to result + result += INDENT + stringified_value + + # Add a comma if this entry is not the final entry in the list + if idx < last_idx: + result += ',' + + result += '\n]' + return result - return '' + raise TypeError() def loads(text: str) -> Dict: @@ -780,5 +796,6 @@ def dumps(data: Dict) -> str: :param data: Dictionary data to stringify :return: String with the data in Gura format """ - content = GuraParser().dumps(data, indentation_level=0, new_line=True) + # content = GuraParser().dumps(data, indentation_level=0, new_line=True) + content = GuraParser().dumps(data) return content.lstrip('\n').rstrip('\n')