This repository has been archived by the owner on May 3, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 27
/
waveform.go
307 lines (253 loc) · 9 KB
/
waveform.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
// Package waveform is capable of generating waveform images from audio streams. MIT Licensed.
package waveform
import (
"image"
"image/color"
"io"
"math"
"azul3d.org/engine/audio"
// Import WAV and FLAC decoders
_ "azul3d.org/engine/audio/flac"
_ "azul3d.org/engine/audio/wav"
)
const (
// imgYDefault is the default height of the generated waveform image
imgYDefault = 128
// scaleDefault is the default scaling factor used when scaling computed
// value and waveform height by the output image's height
scaleDefault = 3.00
)
// Error values from azul3d/engine/audio are wrapped, so that callers do not
// have to import an additional package to check for common errors.
var (
// ErrFormat is returned when the input audio format is not a registered format
// with the audio package.
ErrFormat = audio.ErrFormat
// ErrInvalidData is returned when the input audio format is recognized, but
// the stream is invalid or corrupt in some way.
ErrInvalidData = audio.ErrInvalidData
// ErrUnexpectedEOS is returned when end-of-stream is encountered in the middle
// of a fixed-size block or data structure.
ErrUnexpectedEOS = audio.ErrUnexpectedEOS
)
// Waveform is a struct which can be manipulated and used to generate
// audio waveform images from an input audio stream.
type Waveform struct {
r io.Reader
resolution uint
sampleFn SampleReduceFunc
bgColorFn ColorFunc
fgColorFn ColorFunc
scaleX uint
scaleY uint
sharpness uint
scaleClipping bool
}
// Generate immediately opens and reads an input audio stream, computes
// the values required for waveform generation, and returns a waveform image
// which is customized by zero or more, variadic, OptionsFunc parameters.
//
// Generate is equivalent to calling New, followed by the Compute and Draw
// methods of a Waveform struct. In general, Generate should only be used
// for one-time waveform image generation.
func Generate(r io.Reader, options ...OptionsFunc) (image.Image, error) {
w, err := New(r, options...)
if err != nil {
return nil, err
}
values, err := w.Compute()
return w.Draw(values), err
}
// New generates a new Waveform struct, applying any input OptionsFunc
// on return.
func New(r io.Reader, options ...OptionsFunc) (*Waveform, error) {
// Generate Waveform struct with sane defaults
w := &Waveform{
// Read from input stream
r: r,
// Read audio and compute values once per second of audio
resolution: 1,
// Use RMSF64Samples as a SampleReduceFunc
sampleFn: RMSF64Samples,
// Generate solid, black background color with solid, white
// foreground color waveform using ColorFunc
bgColorFn: SolidColor(color.White),
fgColorFn: SolidColor(color.Black),
// No scaling
scaleX: 1,
scaleY: 1,
// Normal sharpness
sharpness: 1,
// Do not scale clipping values
scaleClipping: false,
}
// Apply any input OptionsFunc on return
return w, w.SetOptions(options...)
}
// Compute creates a slice of float64 values, computed using an input function.
//
// Compute is typically used once on an audio stream, to read and calculate the values
// used for subsequent waveform generations. Its return value can be used with Draw to
// generate and customize multiple waveform images from a single stream.
func (w *Waveform) Compute() ([]float64, error) {
return w.readAndComputeSamples()
}
// Draw creates a new image.Image from a slice of float64 values.
//
// Draw is typically used after a waveform has been computed one time, and a slice
// of computed values was returned from the first computation. Subsequent calls to
// Draw may be used to customize a waveform using the same input values.
func (w *Waveform) Draw(values []float64) image.Image {
return w.generateImage(values)
}
// readAndComputeSamples opens the input audio stream, computes samples according
// to an input function, and returns a slice of computed values and any errors
// which occurred during the computation.
func (w *Waveform) readAndComputeSamples() ([]float64, error) {
// Validate struct members
// These checks are also done when applying options, but verifying them here
// will prevent a runtime panic if called on an empty Waveform instance.
if w.sampleFn == nil {
return nil, errSampleFunctionNil
}
if w.resolution == 0 {
return nil, errResolutionZero
}
// Open audio decoder on input stream
decoder, _, err := audio.NewDecoder(w.r)
if err != nil {
// Unknown format
if err == audio.ErrFormat {
return nil, ErrFormat
}
// Invalid data
if err == audio.ErrInvalidData {
return nil, ErrInvalidData
}
// Unexpected end-of-stream
if err == audio.ErrUnexpectedEOS {
return nil, ErrUnexpectedEOS
}
// All other errors
return nil, err
}
// computed is a slice of computed values by a SampleReduceFunc, from each
// slice of audio samples
var computed []float64
// Track the current computed value
var value float64
// samples is a slice of float64 audio samples, used to store decoded values
config := decoder.Config()
samples := make(audio.Float64, uint(config.SampleRate*config.Channels)/w.resolution)
for {
// Decode at specified resolution from options
// On any error other than end-of-stream, return
_, err := decoder.Read(samples)
if err != nil && err != audio.EOS {
return nil, err
}
// Apply SampleReduceFunc over float64 audio samples
value = w.sampleFn(samples)
// Store computed value
computed = append(computed, value)
// On end of stream, stop reading values
if err == audio.EOS {
break
}
}
// Return slice of computed values
return computed, nil
}
// generateImage takes a slice of computed values and generates
// a waveform image from the input.
func (w *Waveform) generateImage(computed []float64) image.Image {
// Store integer scale values
intScaleX := int(w.scaleX)
intScaleY := int(w.scaleY)
// Calculate maximum n, x, y, where:
// - n: number of computed values
// - x: number of pixels on X-axis
// - y: number of pixels on Y-axis
maxN := len(computed)
maxX := maxN * intScaleX
maxY := imgYDefault * intScaleY
// Create output, rectangular image
img := image.NewRGBA(image.Rect(0, 0, maxX, maxY))
bounds := img.Bounds()
// Calculate halfway point of Y-axis for image
imgHalfY := bounds.Max.Y / 2
// Calculate a peak value used for smoothing scaled X-axis images
peak := int(math.Ceil(float64(w.scaleX)) / 2)
// Calculate scaling factor, based upon maximum value computed by a SampleReduceFunc.
// If option ScaleClipping is true, when maximum value is above certain thresholds
// the scaling factor is reduced to show an accurate waveform with less clipping.
imgScale := scaleDefault
if w.scaleClipping {
// Find maximum value from input slice
var maxValue float64
for _, c := range computed {
if c > maxValue {
maxValue = c
}
}
// For each 0.05 maximum increment at 0.30 and above, reduce the scaling
// factor by 0.25. This is a rough estimate and may be tweaked in the future.
for i := 0.30; i < maxValue; i += 0.05 {
imgScale -= 0.25
}
}
// Values to be used for repeated computations
var scaleComputed, halfScaleComputed, adjust int
intBoundY := int(bounds.Max.Y)
f64BoundY := float64(bounds.Max.Y)
intSharpness := int(w.sharpness)
// Begin iterating all computed values
x := 0
for n := range computed {
// Scale computed value to an integer, using the height of the image and a constant
// scaling factor
scaleComputed = int(math.Floor(computed[n] * f64BoundY * imgScale))
// Calculate the halfway point for the scaled computed value
halfScaleComputed = scaleComputed / 2
// Draw background color down the entire Y-axis
for y := 0; y < intBoundY; y++ {
// If X-axis is being scaled, draw background over several X coordinates
for i := 0; i < intScaleX; i++ {
img.Set(x+i, y, w.bgColorFn(n, x+i, y, maxN, maxX, maxY))
}
}
// Iterate image coordinates on the Y-axis, generating a symmetrical waveform
// image above and below the center of the image
for y := imgHalfY - halfScaleComputed; y < scaleComputed+(imgHalfY-halfScaleComputed); y++ {
// If X-axis is being scaled, draw computed value over several X coordinates
for i := 0; i < intScaleX; i++ {
// When scaled, adjust computed value to be lower on either side of the peak,
// so that the image appears more smooth and less "blocky"
if i < peak {
// Adjust downward
adjust = (i - peak) * intSharpness
} else if i == peak {
// No adjustment at peak
adjust = 0
} else {
// Adjust downward
adjust = (peak - i) * intSharpness
}
// On top half of the image, invert adjustment to create symmetry between
// top and bottom halves
if y < imgHalfY {
adjust = -1 * adjust
}
// Retrieve and apply color function at specified computed value
// count, and X and Y coordinates.
// The output color is selected using the function, and is applied to
// the resulting image.
img.Set(x+i, y+adjust, w.fgColorFn(n, x+i, y+adjust, maxN, maxX, maxY))
}
}
// Increase X by scaling factor, to continue drawing at next loop
x += intScaleX
}
// Return generated image
return img
}