Skip to content
This repository was archived by the owner on Aug 3, 2021. It is now read-only.

Commit 4b95346

Browse files
authored
Merge pull request #269 from edwardhdlu/wavenet-pull-request
WaveNet Implementation
2 parents aad1ca1 + 8c79fa8 commit 4b95346

File tree

16 files changed

+1396
-6
lines changed

16 files changed

+1396
-6
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# pylint: skip-file
2+
import tensorflow as tf
3+
from open_seq2seq.models import Text2SpeechWavenet
4+
from open_seq2seq.encoders import WavenetEncoder
5+
from open_seq2seq.decoders import FakeDecoder
6+
from open_seq2seq.losses import WavenetLoss
7+
from open_seq2seq.data import WavenetDataLayer
8+
from open_seq2seq.optimizers.lr_policies import exp_decay
9+
from open_seq2seq.parts.convs2s.utils import gated_linear_units
10+
11+
base_model = Text2SpeechWavenet
12+
13+
base_params = {
14+
"random_seed": 0,
15+
"use_horovod": False,
16+
"max_steps": 1000000,
17+
18+
"num_gpus": 1,
19+
"batch_size_per_gpu": 2,
20+
21+
"save_summaries_steps": 50,
22+
"print_loss_steps": 50,
23+
"print_samples_steps": 500,
24+
"eval_steps": 500,
25+
"save_checkpoint_steps": 2500,
26+
"logdir": "result/wavenet-LJ-float",
27+
28+
"optimizer": "Adam",
29+
"optimizer_params": {},
30+
"lr_policy": exp_decay,
31+
"lr_policy_params": {
32+
"learning_rate": 1e-3,
33+
"decay_steps": 20000,
34+
"decay_rate": 0.1,
35+
"use_staircase_decay": False,
36+
"begin_decay_at": 45000,
37+
"min_lr": 1e-5,
38+
},
39+
"dtype": tf.float32,
40+
"regularizer": tf.contrib.layers.l2_regularizer,
41+
"regularizer_params": {
42+
"scale": 1e-6
43+
},
44+
"initializer": tf.contrib.layers.xavier_initializer,
45+
46+
"summaries": [],
47+
48+
"encoder": WavenetEncoder,
49+
"encoder_params": {
50+
"layer_type": "conv1d",
51+
"kernel_size": 3,
52+
"strides": 1,
53+
"padding": "VALID",
54+
"blocks": 3,
55+
"layers_per_block": 10,
56+
"filters": 64,
57+
"quantization_channels": 256
58+
},
59+
60+
"decoder": FakeDecoder,
61+
62+
"loss": WavenetLoss,
63+
64+
"data_layer": WavenetDataLayer,
65+
"data_layer_params": {
66+
"num_audio_features": 80,
67+
"dataset_location": "data/speech/LJSpeech/wavs/"
68+
}
69+
}
70+
71+
train_params = {
72+
"data_layer_params": {
73+
"dataset_files": [
74+
"data/speech/LJSpeech/train.csv",
75+
],
76+
"shuffle": True,
77+
},
78+
}
79+
80+
eval_params = {
81+
"data_layer_params": {
82+
"dataset_files": [
83+
"data/speech/LJSpeech/val.csv",
84+
],
85+
"shuffle": False,
86+
},
87+
}
88+
89+
infer_params = {
90+
"data_layer_params": {
91+
"dataset_files": [
92+
"data/speech/LJSpeech/test.csv",
93+
],
94+
"shuffle": False,
95+
},
96+
}
97+
98+
interactive_infer_params = {
99+
"data_layer_params": {
100+
"dataset_files": [],
101+
"shuffle": False,
102+
},
103+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# pylint: skip-file
2+
import tensorflow as tf
3+
from open_seq2seq.models import Text2SpeechWavenet
4+
from open_seq2seq.encoders import WavenetEncoder
5+
from open_seq2seq.decoders import FakeDecoder
6+
from open_seq2seq.losses import WavenetLoss
7+
from open_seq2seq.data import WavenetDataLayer
8+
from open_seq2seq.optimizers.lr_policies import exp_decay
9+
from open_seq2seq.parts.convs2s.utils import gated_linear_units
10+
11+
base_model = Text2SpeechWavenet
12+
13+
base_params = {
14+
"random_seed": 0,
15+
"use_horovod": True,
16+
"max_steps": 1000000,
17+
18+
"num_gpus": 8,
19+
"batch_size_per_gpu": 1,
20+
21+
"save_summaries_steps": 50,
22+
"print_loss_steps": 50,
23+
"print_samples_steps": 500,
24+
"eval_steps": 500,
25+
"save_checkpoint_steps": 2500,
26+
"logdir": "result/wavenet-LJ-float",
27+
28+
"optimizer": "Adam",
29+
"optimizer_params": {},
30+
"lr_policy": exp_decay,
31+
"lr_policy_params": {
32+
"learning_rate": 1e-3,
33+
"decay_steps": 20000,
34+
"decay_rate": 0.1,
35+
"use_staircase_decay": False,
36+
"begin_decay_at": 45000,
37+
"min_lr": 1e-5,
38+
},
39+
"dtype": tf.float32,
40+
"regularizer": tf.contrib.layers.l2_regularizer,
41+
"regularizer_params": {
42+
"scale": 1e-6
43+
},
44+
"initializer": tf.contrib.layers.xavier_initializer,
45+
46+
"summaries": [],
47+
48+
"encoder": WavenetEncoder,
49+
"encoder_params": {
50+
"layer_type": "conv1d",
51+
"kernel_size": 3,
52+
"strides": 1,
53+
"padding": "VALID",
54+
"blocks": 3,
55+
"layers_per_block": 10,
56+
"filters": 64,
57+
"quantization_channels": 256
58+
},
59+
60+
"decoder": FakeDecoder,
61+
62+
"loss": WavenetLoss,
63+
64+
"data_layer": WavenetDataLayer,
65+
"data_layer_params": {
66+
"num_audio_features": 80,
67+
"dataset_location": "/data/LJSpeech-1.1-partitioned/wavs/"
68+
}
69+
}
70+
71+
train_params = {
72+
"data_layer_params": {
73+
"dataset_files": [
74+
"/data/LJSpeech-1.1-partitioned/train.csv",
75+
],
76+
"shuffle": True,
77+
},
78+
}
79+
80+
eval_params = {
81+
"data_layer_params": {
82+
"dataset_files": [
83+
"/data/LJSpeech-1.1-partitioned/val.csv",
84+
],
85+
"shuffle": False,
86+
},
87+
}
88+
89+
infer_params = {
90+
"data_layer_params": {
91+
"dataset_files": [
92+
"/data/LJSpeech-1.1-partitioned/test.csv",
93+
],
94+
"shuffle": False,
95+
},
96+
}
97+
98+
interactive_infer_params = {
99+
"data_layer_params": {
100+
"dataset_files": [],
101+
"shuffle": False,
102+
},
103+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# pylint: skip-file
2+
import tensorflow as tf
3+
from open_seq2seq.models import Text2SpeechWavenet
4+
from open_seq2seq.encoders import WavenetEncoder
5+
from open_seq2seq.decoders import FakeDecoder
6+
from open_seq2seq.losses import WavenetLoss
7+
from open_seq2seq.data import WavenetDataLayer
8+
from open_seq2seq.optimizers.lr_policies import exp_decay
9+
from open_seq2seq.parts.convs2s.utils import gated_linear_units
10+
11+
base_model = Text2SpeechWavenet
12+
13+
base_params = {
14+
"random_seed": 0,
15+
"use_horovod": False,
16+
"max_steps": 1000000,
17+
18+
"num_gpus": 1,
19+
"batch_size_per_gpu": 4,
20+
21+
"save_summaries_steps": 50,
22+
"print_loss_steps": 50,
23+
"print_samples_steps": 500,
24+
"eval_steps": 500,
25+
"save_checkpoint_steps": 2500,
26+
"logdir": "result/wavenet-LJ-mixed",
27+
28+
"optimizer": "Adam",
29+
"optimizer_params": {},
30+
"lr_policy": exp_decay,
31+
"lr_policy_params": {
32+
"learning_rate": 1e-3,
33+
"decay_steps": 20000,
34+
"decay_rate": 0.1,
35+
"use_staircase_decay": False,
36+
"begin_decay_at": 45000,
37+
"min_lr": 1e-5,
38+
},
39+
"dtype": "mixed",
40+
"loss_scaling": "Backoff",
41+
"regularizer": tf.contrib.layers.l2_regularizer,
42+
"regularizer_params": {
43+
"scale": 1e-6
44+
},
45+
"initializer": tf.contrib.layers.xavier_initializer,
46+
47+
"summaries": [],
48+
49+
"encoder": WavenetEncoder,
50+
"encoder_params": {
51+
"layer_type": "conv1d",
52+
"kernel_size": 3,
53+
"strides": 1,
54+
"padding": "VALID",
55+
"blocks": 3,
56+
"layers_per_block": 10,
57+
"filters": 64,
58+
"quantization_channels": 256
59+
},
60+
61+
"decoder": FakeDecoder,
62+
63+
"loss": WavenetLoss,
64+
65+
"data_layer": WavenetDataLayer,
66+
"data_layer_params": {
67+
"num_audio_features": 80,
68+
"dataset_location": "data/speech/LJSpeech/wavs/"
69+
}
70+
}
71+
72+
train_params = {
73+
"data_layer_params": {
74+
"dataset_files": [
75+
"data/speech/LJSpeech/train.csv",
76+
],
77+
"shuffle": True,
78+
},
79+
}
80+
81+
eval_params = {
82+
"data_layer_params": {
83+
"dataset_files": [
84+
"data/speech/LJSpeech/val.csv",
85+
],
86+
"shuffle": False,
87+
},
88+
}
89+
90+
infer_params = {
91+
"data_layer_params": {
92+
"dataset_files": [
93+
"data/speech/LJSpeech/test.csv",
94+
],
95+
"shuffle": False,
96+
},
97+
}
98+
99+
interactive_infer_params = {
100+
"data_layer_params": {
101+
"dataset_files": [],
102+
"shuffle": False,
103+
},
104+
}

0 commit comments

Comments
 (0)