diff --git a/Word_frequency_counter/README.md b/Word_frequency_counter/README.md new file mode 100644 index 0000000..679d125 --- /dev/null +++ b/Word_frequency_counter/README.md @@ -0,0 +1,29 @@ +# Word Frequency Counter + +A simple python script that counts the number of words in a given text document and prints the top 10 words according to their frequency, along with their frequency of occurence. + +--- + +- **Input :** Path of the text file to be processed +- **Output :** List of top 10 words according to their frequency, along with their frequency of occurence. + +--- + +## Features : + +- User friendly interface +- Output is in tabular format +- Case insensitive processing of words +- Get the Top 10 words in the text file which occur most frequntly, along with their counts + +--- + +## Usage : + +1. Clone the repository +2. Navigate to the project folder +3. Run the command : + +```python +python3 main.py +``` \ No newline at end of file diff --git a/Word_frequency_counter/main.py b/Word_frequency_counter/main.py new file mode 100644 index 0000000..8f41a33 --- /dev/null +++ b/Word_frequency_counter/main.py @@ -0,0 +1,27 @@ +import re +from collections import Counter + +def find_words_frequency(file_path): + ''' + This script takes the path of the text file to be processed, as input (argument) + and prints the top ten words and also prints their counts in given text file. + ''' + with open(file_path, 'r', encoding='utf-8') as file: + text = file.read().lower() + + all_words = re.findall(r'\b\w+\b', text) + word_frequency = Counter(all_words) + most_common_words = word_frequency.most_common(10) + + # Print in tabular format + print(f"{'Word':<15} {'Count':<5}") + print("-" * 20) + for word, count in most_common_words: + print(f"{word:<15} {count:<5}") + +def main(): + file_path = input("Enter the path of file : ") + find_words_frequency(file_path) + +if __name__ == "__main__": + main()