diff --git a/gui-design/phrase_checker.ui b/gui-design/phrase_checker.ui
index 19d7eea..cc7ed53 100644
--- a/gui-design/phrase_checker.ui
+++ b/gui-design/phrase_checker.ui
@@ -27,7 +27,7 @@
Run
-
+
10
@@ -42,6 +42,9 @@
true
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
true
@@ -56,6 +59,9 @@
-
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
true
@@ -70,6 +76,9 @@
-
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
true
@@ -84,6 +93,9 @@
-
+
+ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter
+
true
diff --git a/src/phrase_checker.py b/src/phrase_checker.py
index df214a1..8e934f3 100644
--- a/src/phrase_checker.py
+++ b/src/phrase_checker.py
@@ -1,3 +1,5 @@
+# coding: utf-8
+
import sys
import csv
import os
@@ -5,6 +7,7 @@
import common
import phrase_checker_gui
from PySide6 import QtWidgets
+import yaml
class MainWindow(QtWidgets.QMainWindow, phrase_checker_gui.Ui_MainWindow):
@@ -15,11 +18,29 @@ def __init__(self):
self.output_folder_path = ''
self.phrases_file_path = ''
self.words_file_path = ''
+ self.config_file_path = ''
self.pushButton_input_folder_path.clicked.connect(self.on_pushButton_input_folder_path_clicked)
self.pushButton_output_folder_path.clicked.connect(self.on_pushButton_output_folder_path_clicked)
self.pushButton_phrases_file_path.clicked.connect(self.on_pushButton_phrases_file_path_clicked)
self.pushButton_words_file_path.clicked.connect(self.on_pushButton_words_file_path_clicked)
self.pushButton_run.clicked.connect(self.on_pushButton_run_clicked)
+ if len(sys.argv) > 1 and os.path.isfile(sys.argv[1]) and sys.argv[1].split('.')[-1] == 'yaml':
+ self.load_config(sys.argv[1])
+
+ def load_config(self, config_file_path):
+ with open(config_file_path, 'r') as stream:
+ try:
+ config = yaml.safe_load(stream)
+ self.input_folder_path = config['input_folder_path']
+ self.lineEdit_input_folder_path.setText(self.input_folder_path)
+ self.output_folder_path = config['output_folder_path']
+ self.lineEdit_output_folder_path.setText(self.output_folder_path)
+ self.phrases_file_path = config['phrases_file_path']
+ self.lineEdit_phrases_file_path.setText(self.phrases_file_path)
+ self.words_file_path = config['words_file_path']
+ self.lineEdit_words_file_path.setText(self.words_file_path)
+ except yaml.YAMLError as exc:
+ print(exc)
def on_pushButton_input_folder_path_clicked(self):
self.input_folder_path = QtWidgets.QFileDialog.getExistingDirectory(self,
@@ -48,6 +69,10 @@ def on_pushButton_words_file_path_clicked(self):
self.lineEdit_words_file_path.setText(self.words_file_path)
def on_pushButton_run_clicked(self):
+ with open(self.output_folder_path + '/config.yaml', 'w') as yaml_file:
+ yaml.dump({'input_folder_path': self.input_folder_path, 'output_folder_path': self.output_folder_path,
+ 'phrases_file_path': self.phrases_file_path, 'words_file_path': self.words_file_path}, yaml_file,
+ default_flow_style=False, allow_unicode=True)
main([self.input_folder_path, self.output_folder_path, self.phrases_file_path, self.words_file_path])
@@ -57,7 +82,7 @@ def get_count_in_list(elements_ut, list_ut):
out_dict = dict.fromkeys(elements_ut_lower)
for elem_ut in elements_ut_lower:
out_dict[elem_ut] = list_ut_lower.count(elem_ut)
- return dict(sorted(out_dict.items(), key=lambda kv: kv[1], reverse=True))
+ return out_dict
# TODO Make sure substring isn't enclosed by chars
@@ -67,31 +92,40 @@ def get_count_in_string(elements_ut, string_ut):
out_dict = dict.fromkeys(elements_ut_lower)
for elem_ut in elements_ut_lower:
out_dict[elem_ut] = string_ut_lower.count(elem_ut)
- return dict(sorted(out_dict.items(), key=lambda kv: kv[1], reverse=True))
+ return out_dict
def get_list_from_csv_first_row(csv_file):
with open(csv_file, newline='') as csvfile:
csvreader = csv.reader(csvfile, delimiter=',', quotechar='|')
row_1 = next(csvreader)
- return row_1
+ row_1_lower = [e.lower() for e in row_1]
+ return row_1_lower
-def write_count_dict(cv_in, dict_ut):
- with open(cv_in, 'w', newline='') as csvfile:
- writer = csv.DictWriter(csvfile, fieldnames=dict_ut.keys())
+def write_count_dict(csv_path, list_of_dicts, text_paths):
+ with open(csv_path, 'w', newline='') as csvfile:
+ header = list(list_of_dicts[0].keys())
+ header.append('text_path')
+ writer = csv.DictWriter(csvfile, fieldnames=header)
writer.writeheader()
- writer.writerows([dict_ut])
+ for idx, specific_dict in enumerate(list_of_dicts):
+ specific_dict['text_path'] = text_paths[idx]
+ writer.writerow(specific_dict)
def input_handling(argv):
+ # argv = [self.input_folder_path, self.output_folder_path, self.phrases_file_path, self.words_file_path]
files_to_check = []
- # TODO is this recursive?
- for file in os.listdir(argv[0]):
- if not (file.endswith(".doc") or file.endswith(".docx") or file.endswith("pdf") or file.endswith(".txt")): # ANY (?!)
- continue
- else:
- files_to_check.append(os.path.join(argv[0], file))
+ for folder, subs, filenames in os.walk(argv[0]):
+ for filename in filenames:
+ if not (filename.endswith(".doc") or filename.endswith(".docx") or filename.endswith("pdf") or filename.endswith(".txt")): # ANY (?!)
+ continue
+ else:
+ files_to_check.append(os.path.join(argv[0], os.path.join(folder, filename)))
+ if len(files_to_check) == 0:
+ print("No files to check")
+ sys.exit()
return files_to_check, argv[1], argv[2], argv[3]
@@ -130,66 +164,51 @@ def console_out(phrases_dict, words_dict, word_count):
def main(argv):
+ # argv = [self.input_folder_path, self.output_folder_path, self.phrases_file_path, self.words_file_path]
# If valid, fetch path to text and input list
text_paths, out_dir, phrases_path, words_path = input_handling(argv)
+ # Fetch list of bad phrases from provided csv file
+ phrases_list = sorted(get_list_from_csv_first_row(phrases_path))
+
+ # Fetch list of bad words from provided csv file
+ words_list = sorted(get_list_from_csv_first_row(words_path))
+
+ # Aggregate bad phrases and words in one dict
+ phrases_dicts_list = []
+ words_dicts_list = []
+
# Fetch full text of file in local string
for text_path in text_paths:
- # Make output directory
- # TODO consolidate with plag checker
- out_dir_file = os.path.join(out_dir, os.path.basename(text_path))
- try:
- os.makedirs(out_dir_file, exist_ok=True)
- except Exception as e:
- print('Error making output path.')
- sys.exit()
-
full_text_ut = common.get_string_from_path(text_path)
# Early out if doc empty
if not full_text_ut:
- print('Document under test is empty. Provide link to a document that is not empty.')
- sys.exit()
+ print('Error reading file: {}'.format(text_path))
+ continue
# TODO: Check if no 'space' within any entry of list
- # Fetch list of bad phrases from provided csv file
- phrases_list = get_list_from_csv_first_row(phrases_path)
+ # ??
# Get count of bad phrases as absolute counts within full text
- phrases_dict = get_count_in_string(phrases_list, full_text_ut)
-
- # Fetch list of bad words from provided csv file
- words_list = get_list_from_csv_first_row(words_path)
+ phrases_counts = get_count_in_string(phrases_list, full_text_ut)
+ phrases_dicts_list.append(phrases_counts)
# Fetch list of individual words within doc ut
single_words_within_txt_ut = extract_words_only_from_string(full_text_ut)
# Get count of bad words as absolute counts within list of words
- words_dict = get_count_in_list(words_list, single_words_within_txt_ut)
+ words_counts = get_count_in_list(words_list, single_words_within_txt_ut)
+ words_dicts_list.append(words_counts)
- # Write output dicts to csv
- write_count_dict(out_dir_file + '/phrases.csv', phrases_dict)
- write_count_dict(out_dir_file + '/words.csv', words_dict)
-
- # Write console output
- console_out(phrases_dict, words_dict, len(single_words_within_txt_ut))
+ # Write output dicts to csv
+ write_count_dict(out_dir + '/phrases.csv', phrases_dicts_list, text_paths)
+ write_count_dict(out_dir + '/words.csv', words_dicts_list, text_paths)
if __name__ == "__main__":
app = QtWidgets.QApplication(sys.argv)
- #parser = argparse.ArgumentParser(
- # description='Test a text document for excessive use of words or phrases that should be avoided')
- #parser.add_argument('doc', help='Path to document under test')
- #parser.add_argument('out', help='Path to output folder')
- #parser.add_argument('phrases', help='Link to phrases csv file that shall be tested')
- #parser.add_argument('words', help='Link to words csv file that shall be tested')
- #args = parser.parse_args()
- #main(sys.argv[1:])
-
mainwindow = MainWindow()
mainwindow.show()
sys.exit(app.exec())
- #mainwindow.show()
- #cfg_file_ext = os.path.splitext(app.lineEdit_input_folder)[1]
- #main(sys.argv[1:])
diff --git a/src/phrase_checker_gui.py b/src/phrase_checker_gui.py
index 6b0647e..7fd0e1a 100644
--- a/src/phrase_checker_gui.py
+++ b/src/phrase_checker_gui.py
@@ -29,53 +29,57 @@ def setupUi(self, MainWindow):
self.pushButton_run = QPushButton(self.centralwidget)
self.pushButton_run.setObjectName(u"pushButton_run")
self.pushButton_run.setGeometry(QRect(120, 200, 75, 24))
- self.widget = QWidget(self.centralwidget)
- self.widget.setObjectName(u"widget")
- self.widget.setGeometry(QRect(10, 10, 281, 171))
- self.gridLayout = QGridLayout(self.widget)
+ self.layoutWidget = QWidget(self.centralwidget)
+ self.layoutWidget.setObjectName(u"layoutWidget")
+ self.layoutWidget.setGeometry(QRect(10, 10, 281, 171))
+ self.gridLayout = QGridLayout(self.layoutWidget)
self.gridLayout.setObjectName(u"gridLayout")
self.gridLayout.setContentsMargins(0, 0, 0, 0)
- self.lineEdit_input_folder_path = QLineEdit(self.widget)
+ self.lineEdit_input_folder_path = QLineEdit(self.layoutWidget)
self.lineEdit_input_folder_path.setObjectName(u"lineEdit_input_folder_path")
self.lineEdit_input_folder_path.setEnabled(True)
+ self.lineEdit_input_folder_path.setAlignment(Qt.AlignRight|Qt.AlignTrailing|Qt.AlignVCenter)
self.lineEdit_input_folder_path.setReadOnly(True)
self.gridLayout.addWidget(self.lineEdit_input_folder_path, 0, 0, 1, 1)
- self.pushButton_input_folder_path = QPushButton(self.widget)
+ self.pushButton_input_folder_path = QPushButton(self.layoutWidget)
self.pushButton_input_folder_path.setObjectName(u"pushButton_input_folder_path")
self.gridLayout.addWidget(self.pushButton_input_folder_path, 0, 1, 1, 1)
- self.lineEdit_output_folder_path = QLineEdit(self.widget)
+ self.lineEdit_output_folder_path = QLineEdit(self.layoutWidget)
self.lineEdit_output_folder_path.setObjectName(u"lineEdit_output_folder_path")
+ self.lineEdit_output_folder_path.setAlignment(Qt.AlignRight|Qt.AlignTrailing|Qt.AlignVCenter)
self.lineEdit_output_folder_path.setReadOnly(True)
self.gridLayout.addWidget(self.lineEdit_output_folder_path, 1, 0, 1, 1)
- self.pushButton_output_folder_path = QPushButton(self.widget)
+ self.pushButton_output_folder_path = QPushButton(self.layoutWidget)
self.pushButton_output_folder_path.setObjectName(u"pushButton_output_folder_path")
self.gridLayout.addWidget(self.pushButton_output_folder_path, 1, 1, 1, 1)
- self.lineEdit_phrases_file_path = QLineEdit(self.widget)
+ self.lineEdit_phrases_file_path = QLineEdit(self.layoutWidget)
self.lineEdit_phrases_file_path.setObjectName(u"lineEdit_phrases_file_path")
+ self.lineEdit_phrases_file_path.setAlignment(Qt.AlignRight|Qt.AlignTrailing|Qt.AlignVCenter)
self.lineEdit_phrases_file_path.setReadOnly(True)
self.gridLayout.addWidget(self.lineEdit_phrases_file_path, 2, 0, 1, 1)
- self.pushButton_phrases_file_path = QPushButton(self.widget)
+ self.pushButton_phrases_file_path = QPushButton(self.layoutWidget)
self.pushButton_phrases_file_path.setObjectName(u"pushButton_phrases_file_path")
self.gridLayout.addWidget(self.pushButton_phrases_file_path, 2, 1, 1, 1)
- self.lineEdit_words_file_path = QLineEdit(self.widget)
+ self.lineEdit_words_file_path = QLineEdit(self.layoutWidget)
self.lineEdit_words_file_path.setObjectName(u"lineEdit_words_file_path")
+ self.lineEdit_words_file_path.setAlignment(Qt.AlignRight|Qt.AlignTrailing|Qt.AlignVCenter)
self.lineEdit_words_file_path.setReadOnly(True)
self.gridLayout.addWidget(self.lineEdit_words_file_path, 3, 0, 1, 1)
- self.pushButton_words_file_path = QPushButton(self.widget)
+ self.pushButton_words_file_path = QPushButton(self.layoutWidget)
self.pushButton_words_file_path.setObjectName(u"pushButton_words_file_path")
self.gridLayout.addWidget(self.pushButton_words_file_path, 3, 1, 1, 1)