-
Notifications
You must be signed in to change notification settings - Fork 3
/
data.py
104 lines (79 loc) · 3.56 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
from keras.utils import np_utils
import config
import glob
import os
def prepare_sequences(conditions=[]):
""" Prepare the sequences used by the Neural Network """
conditions_string = ""
n = len(conditions)
for i in range(n):
conditions_string += conditions[i]
if i != n - 1:
conditions_string += "_"
if conditions_string == "":
conditions_string = "all"
if not os.path.exists("data/" + conditions_string):
print("Create dir data/" + conditions_string + ".")
sequence_length = config.SEQUENCE_LENGTH
filenames_temp = []
for filename in glob.glob("data/individual_songs/*"):
filenames_temp.append(filename)
filenames_sorted = sorted(filenames_temp)
filenames = []
for filename in filenames_sorted:
cond = False
for i in range(n):
if conditions[i] in filename:
cond = True
if cond:
filenames.append(filename)
print("Number of songs matching the conditions: " + repr(len(filenames)))
notes = []
for i in range(len(filenames)):
notes += list(np.load(filenames[i]))
if conditions_string == "all":
n_vocab = len(set(notes))
# get all pitch names
pitchnames = sorted(set(item for item in notes))
else:
_, _, n_vocab, pitchnames, _ = prepare_sequences()
# create a dictionary to map pitches to integers
note_to_int = dict((note, number)
for number, note in enumerate(pitchnames))
unnormalized_network_input = []
network_output = []
# create input sequences and the corresponding outputs
for i in range(0, len(notes) - sequence_length, 1):
sequence_in = notes[i:i + sequence_length]
sequence_out = notes[i + sequence_length]
unnormalized_network_input.append(
[note_to_int[char] for char in sequence_in])
network_output.append(note_to_int[sequence_out])
n_patterns = len(unnormalized_network_input)
# reshape the input into a format compatible with LSTM layers
network_input = np.reshape(
unnormalized_network_input, (n_patterns, sequence_length, 1))
# normalize input
network_input = network_input / float(n_vocab)
network_output = np_utils.to_categorical(network_output)
pitchnames = sorted(set(item for item in notes))
os.makedirs("data/" + conditions_string)
np.save("data/" + conditions_string +
"/network_input.npy", network_input)
np.save("data/" + conditions_string +
"/network_output.npy", network_output)
np.save("data/" + conditions_string + "/n_vocab.npy", n_vocab)
np.save("data/" + conditions_string + "/pitchnames.npy", pitchnames)
np.save("data/" + conditions_string +
"/unormalized.npy", unnormalized_network_input)
else:
network_input = np.load(
"data/" + conditions_string + "/network_input.npy")
network_output = np.load(
"data/" + conditions_string + "/network_output.npy")
n_vocab = int(np.load("data/" + conditions_string + "/n_vocab.npy"))
pitchnames = np.load("data/" + conditions_string + "/pitchnames.npy")
unnormalized_network_input = np.load(
"data/" + conditions_string + "/unormalized.npy")
return network_input, network_output, n_vocab, pitchnames, unnormalized_network_input