From 96eb13870a80691b93329b31d8d6c986e7ed4435 Mon Sep 17 00:00:00 2001
From: Patrick_Cin <me@patrickphat.com>
Date: Tue, 23 Feb 2021 13:29:56 +0700
Subject: [PATCH] Edit readme (#26)

* refactor pkg name

* update README

* update year in readme

* remove ruby folder
---
 README.md                       |  18 ++--
 ruby/__init__.py                |   4 -
 ruby/translator.py              |  83 -------------------
 ruby/utils/__init__.py          |   1 -
 ruby/utils/misc.py              |   4 -
 ruby/utils/tree_manipulation.py | 141 --------------------------------
 6 files changed, 9 insertions(+), 242 deletions(-)
 delete mode 100644 ruby/__init__.py
 delete mode 100644 ruby/translator.py
 delete mode 100644 ruby/utils/__init__.py
 delete mode 100644 ruby/utils/misc.py
 delete mode 100644 ruby/utils/tree_manipulation.py
diff --git a/README.md b/README.md
index e5ec133..1c6961a 100644
--- a/README.md
+++ b/README.md
@@ -5,13 +5,13 @@
 
 *Why not translate it yourself when Google Translate cannot satisfy you❓*
 
-[![CircleCI](https://circleci.com/gh/urbans/urbans/tree/master.svg?style=svg)](https://circleci.com/gh/urbans/urbans/tree/master)
-[![Codacy Badge](https://app.codacy.com/project/badge/Grade/b4937f1f9fe0477b9fc557cbedf92b24)](https://www.codacy.com/gh/urbans/urbans?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=urbans/urbans&amp;utm_campaign=Badge_Grade)
-[![Codacy Badge](https://app.codacy.com/project/badge/Coverage/b4937f1f9fe0477b9fc557cbedf92b24)](https://www.codacy.com/gh/urbans/urbans?utm_source=github.com&utm_medium=referral&utm_content=urbans/urbans&utm_campaign=Badge_Coverage)
+[![CircleCI](https://circleci.com/gh/pyurbans/urbans/tree/master.svg?style=svg)](https://circleci.com/gh/pyurbans/urbans/tree/master)
+[![Codacy Badge](https://app.codacy.com/project/badge/Grade/b4937f1f9fe0477b9fc557cbedf92b24)](https://www.codacy.com/gh/pyurbans/urbans?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=pyurbans/urbans&amp;utm_campaign=Badge_Grade)
+[![Codacy Badge](https://app.codacy.com/project/badge/Coverage/b4937f1f9fe0477b9fc557cbedf92b24)](https://www.codacy.com/gh/pyurbans/urbans?utm_source=github.com&utm_medium=referral&utm_content=pyurbans/urbans&utm_campaign=Badge_Coverage)
 [![PyPI version](https://badge.fury.io/py/urbans.svg)](https://badge.fury.io/py/urbans)
-[![GitHub release](https://img.shields.io/github/release/urbans/urbans.svg)](https://GitHub.com/urbans/urbans/releases/)
-[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/urbans/urbans/graphs/commit-activity)
-[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/urbans/urbans/blob/master/LICENSE)
+[![GitHub release](https://img.shields.io/github/release/pyurbans/urbans.svg)](https://GitHub.com/pyurbans/urbans/releases/)
+[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/pyurbans/urbans/graphs/commit-activity)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/pyurbans/urbans/blob/master/LICENSE)
 
 </div>
 
@@ -68,7 +68,7 @@ trans_sentences = translator.translate(src_sentences)
 ```
 
 ## ⚖️ License
-This repository is using the Apache 2.0 license that is listed in the repo. Please take a look at [`LICENSE`](https://github.com/urbans/urbans/blob/master/LICENSE) as you wish.
+This repository is using the Apache 2.0 license that is listed in the repo. Please take a look at [`LICENSE`](https://github.com/pyurbans/urbans/blob/master/LICENSE) as you wish.
 
 ## ✍️ BibTeX
 If you wish to cite the framework feel free to use this (but only if you loved it 😊):
@@ -76,10 +76,10 @@ If you wish to cite the framework feel free to use this (but only if you loved i
 @misc{phat2020urbans,
   author = {Truong-Phat Nguyen},
   title = {URBANS: Universal Rule-Based Machine Translation NLP toolkit},
-  year = {2020},
+  year = {2021},
   publisher = {GitHub},
   journal = {GitHub repository},
-  howpublished = {\url{https://github.com/urbans/urbans}},
+  howpublished = {\url{https://github.com/pyurbans/urbans}},
 }
 ```
 
diff --git a/ruby/__init__.py b/ruby/__init__.py
deleted file mode 100644
index 28b1bb9..0000000
--- a/ruby/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Import production-ready tools of RUBY."""
-from .translator import Translator
-
-__all__ = ['Translator',]
diff --git a/ruby/translator.py b/ruby/translator.py
deleted file mode 100644
index a7b4447..0000000
--- a/ruby/translator.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from typing import Dict, List
-from .utils.tree_manipulation import translate_trees_grammar
-from .utils.misc import remove_trailing_space
-import nltk
-from nltk.parse.chart import BottomUpLeftCornerChartParser as Parser
-
-class Translator:
-    """"""
-    def __init__(self,
-                 src_grammar: str,
-                 src_to_tgt_grammar: Dict,
-                 src_to_tgt_dictionary: Dict):
-        """
-        Initialize the translator
-        Args:
-            src_grammar (str): source language grammar written in nltk style
-            E.g: src_grammar = \"""
-                                S -> NP VP
-                                NP -> PRP
-                                VP -> VB PP
-                                PP -> PB NP
-                                NP -> CD NP1
-                                NP1 -> JJ NN
-                                PRP -> 'I'
-                                VB -> 'go'
-                                PB -> 'to'
-                                CD -> 'a'
-                                JJ -> 'good'
-                                NN -> 'school'
-                               \"""
-            src_to_tgt_grammar (Dict): Transition between source grammar and target grammar as a dict
-            E.g: src2target_grammar =  {
-                                    "NP1 -> JJ NN": "NP1 -> NN JJ"
-                                        }
-            src_to_tgt_dictionary (Dict): Dictionary of word-by-word transition from src language to target language
-            E.g: en_to_vi_dict = {
-                                "I":"tôi",
-                                "go":"đi",
-                                "to":"tới",
-                                "school":"ngôi_trường",
-                                ...
-                                 }
-        """
-        self.src_grammar = nltk.CFG.fromstring(self.__process_text_input(src_grammar))
-        self.parser = Parser(self.src_grammar)
-        self.src_to_tgt_grammar =  src_to_tgt_grammar
-        self.src_to_tgt_dictionary = src_to_tgt_dictionary
-
-    @staticmethod
-    def __process_text_input(txt):
-        return remove_trailing_space(txt)
-
-    def translate(self, sentences: List[str] or str, allow_multiple_translation = False):
-        """
-        Translate a list of sentences
-        Args:
-            sentences (List[str]): A list of str-typed sentences
-        Returns:
-            List[str]: A list of translated sentences
-        """
-        if isinstance(sentences,str):
-            sentences = [sentences]
-
-        translated_sentences = []
-        failed_sentences = []
-
-        for sentence in sentences:
-            sentence = self.__process_text_input(sentence)
-            trees = self.parser.parse(sentence.split())
-            list_trees = [tree for tree in trees]
-            if len(list_trees) == 0:
-                failed_sentences.append(sentence)
-                continue
-            trans_sentence = translate_trees_grammar(list_trees, self.src_to_tgt_grammar, self.src_to_tgt_dictionary)
-            translated_sentences.append(trans_sentence)
-
-        # String to display failed sentence
-        failed_sentences = '\n'.join(failed_sentences)
-
-        if len(failed_sentences) > 0:
-            raise ValueError(f"Please check your grammar again, failed to parse these sentences: \n{failed_sentences}")
-
-        return translated_sentences
\ No newline at end of file
diff --git a/ruby/utils/__init__.py b/ruby/utils/__init__.py
deleted file mode 100644
index 3e43ba2..0000000
--- a/ruby/utils/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Utilities for tree manipulation."""
\ No newline at end of file
diff --git a/ruby/utils/misc.py b/ruby/utils/misc.py
deleted file mode 100644
index dd8384c..0000000
--- a/ruby/utils/misc.py
+++ /dev/null
@@ -1,4 +0,0 @@
-import re
-
-def remove_trailing_space(sentence):
-    return re.sub(' +', ' ', sentence)
\ No newline at end of file
diff --git a/ruby/utils/tree_manipulation.py b/ruby/utils/tree_manipulation.py
deleted file mode 100644
index 5616a70..0000000
--- a/ruby/utils/tree_manipulation.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import nltk
-from nltk import ParentedTree as PTree
-from typing import List
-import random
-
-def tree_to_ptree(tree: nltk.Tree):
-    tree_str = tree.__str__()
-    ptree = PTree.fromstring(tree_str)
-    return ptree
-
-def swap_tree_given_left(left_tree: nltk.Tree, displacement: List[int], new_words= List[str]):
-    """Swap left node with right node within a parent node."""
-    nodes = [left_tree]
-    right_tree = left_tree.right_sibling()
-    parent_tree = left_tree.parent()
-    # Get all tree pointer
-    for disp in displacement:
-        # disp = -1 indicates that is a new word, skip
-        if disp == -1:
-            continue
-        nodes.append(right_tree)
-
-        right_tree = right_tree.right_sibling()
-        if right_tree == None:
-            break
-
-    # Remove all siblings and left-most self
-    for node in nodes:
-        parent_tree.remove(node)
-
-    # Append with new displacement
-    for disp in displacement:
-        # disp = -1 indicates that is a new word
-        if disp == -1:
-            new_word = PTree('NEW', [new_words.pop(0)])
-            parent_tree.append(new_word)
-        else:
-            parent_tree.append(nodes[disp])
-
-    return parent_tree
-    
-
-def build_grammar_str_from_left_most(tree: nltk.Tree):
-    
-    left_pt = tree.left_sibling()
-    right_pt = tree.right_sibling()
-    parent_pt = tree.parent()
-    
-    grammar_str = None
-
-    if parent_pt != None:
-        grammar_str = f"{parent_pt.label()} -> {tree.label()}"
-
-        # Build grammar from leftmost node in the subtree
-        if left_pt == None and right_pt != None :
-            while right_pt != None:
-                grammar_str += f" {right_pt.label()}"
-                right_pt = right_pt.right_sibling()
-    return grammar_str
-
-
-def translate_tree_grammar(tree: nltk.Tree, grammar_substitutions: dict):
-    """Translate tree grammar based on grammar substitution dict."""
-    # Number of substitution done
-    num_subs = 0
-    # Convert tree to ParentedTree
-    ptree = tree_to_ptree(tree)
-    old_num_subs = -1
-
-    # Loops until there no substitution left
-    while num_subs != old_num_subs:
-        old_num_subs = num_subs
-        # Traverse through subtrees
-        for sub in ptree.subtrees():
-            # Create grammar string from left-most node. E.g: NP -> JJ NP,
-            # in this case, JJ is left-most node
-            grammar_str = build_grammar_str_from_left_most(sub)
-            for src_grammar, tgt_grammar in grammar_substitutions.items():
-                if grammar_str == src_grammar:
-                    # Increment number of substitutions
-                    num_subs += 1
-                    # Calculate displacement between 2 grammar strings
-                    disp, new_words = calculate_displacement(src_grammar,tgt_grammar)
-                    # Change tree nodes positions thanks to new displacement
-                    swap_tree_given_left(sub, disp, new_words)
- 
-                
-    translated_grammar_sentence = " ".join(ptree.leaves())
-    return translated_grammar_sentence, num_subs
-
-def translate_sentence_words(sentence, src_to_tgt_dictionary):
-    words_list = []
-
-    for word in sentence.split():
-        target_word = src_to_tgt_dictionary.get(word,word)
-
-        if isinstance(target_word, list):
-            target_word = random.choice(target_word)
-        
-        words_list.append(target_word)
-
-    return ' '.join(words_list)
-
-def translate_trees_grammar(list_trees: List[nltk.Tree], src_to_tgt_grammar, src_to_tgt_dictionary):
-
-    # translated sentence map with number of grammar substitution found
-    trans_map = {}
-
-    for tree in list_trees:
-        # Translate grammar
-        trans_gram_sentence, num_subs = translate_tree_grammar(tree, src_to_tgt_grammar)
-
-        # Translate words
-        trans_lang_sentence = translate_sentence_words(trans_gram_sentence, src_to_tgt_dictionary)
-        
-        # Append to trans map
-        trans_map[trans_lang_sentence] = num_subs
-    # Return translation that has the most displacement
-    return max(trans_map, key=trans_map.get)
-
-def calculate_displacement(src_grammar, tgt_grammar):
-    """Calculate displacement between 2 grammar. E.g: S -> A B C to S -> B C A has displacement of [1 2 0]"""
-    src_grammar_lst = src_grammar.split()
-    tgt_grammar_lst = tgt_grammar.split()
-    
-    src_grammar_lst = src_grammar_lst[src_grammar_lst.index("->")+1:]
-    tgt_grammar_lst = tgt_grammar_lst[tgt_grammar_lst.index("->")+1:]
-
-    displacement = []
-    new_words = []
-
-    for word in tgt_grammar_lst:
-        try:
-          displacement.append(src_grammar_lst.index(word))
-        except ValueError:
-          # Resolve  ValueError: substring not found
-          # Which indicates this is a new word
-          displacement.append(-1)
-          new_words.append(word)
-
-    return displacement, new_words
\ No newline at end of file