Skip to content

Commit

Permalink
added utils module
Browse files Browse the repository at this point in the history
  • Loading branch information
thushv89 committed Mar 18, 2019
1 parent 372e40e commit d25aaaa
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
Empty file added utils/__init__.py
Empty file.
23 changes: 23 additions & 0 deletions utils/data_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
import numpy as np


def read_data(filename):
""" Reading the zip file to extract text """
text = []
with open(filename, 'r', encoding='utf-8') as f:
i = 0
for row in f:
text.append(row)
i += 1
return text


def sents2sequences(tokenizer, sentences, reverse=False, pad_length=None, padding_type='post'):
encoded_text = tokenizer.texts_to_sequences(sentences)
preproc_text = pad_sequences(encoded_text, padding=padding_type, maxlen=pad_length)
if reverse:
preproc_text = np.flip(preproc_text, axis=1)

return preproc_text

0 comments on commit d25aaaa

Please sign in to comment.