forked from tensorflow/tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlearn.py
More file actions
492 lines (404 loc) · 18.6 KB
/
learn.py
File metadata and controls
492 lines (404 loc) · 18.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# pylint: disable=g-short-docstring-punctuation
"""## Higher level ops related to regularization and building layers.
This package provides several ops that take care of creating variables that are
used internally in a consistent way and provide the building blocks for many
common machine learning algorithms.
@@convolution2d
@@fully_connected
## Regularizers
Regularization can help prevent overfitting.
These have the signature `fn(weights)`. The loss is typically added to
`tf.GraphKeys.REGULARIZATION_LOSS`
@@l1_regularizer
@@l2_regularizer
## Initializations
This also includes a common initialization for connecting multiple layers.
@@xavier_initializer
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numbers
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import nn
from tensorflow.python.ops import standard_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.platform import logging
__all__ = ['xavier_initializer', 'fully_connected', 'l1_regularizer',
'l2_regularizer']
def xavier_initializer(n_inputs, n_outputs, uniform=True):
"""Set the parameter initialization using the method described in paper.
Xavier Glorot and Yoshua Bengio (2010):
Understanding the difficulty of training deep feedforward neural
networks. International conference on artificial intelligence and
statistics.
This method is designed to keep the scale of the gradients roughly the same
in all layers. In uniform distribution this ends up being the range:
`x = sqrt(6. / (in + out)); [-x, x]` and for normal distribution a standard
deviation of `sqrt(3. / (in + out))` is used.
Args:
n_inputs: The number of input nodes into each output.
n_outputs: The number of output nodes for each input.
uniform: If true use a uniform distribution, otherwise use a truncated
normal.
Returns:
An initializer.
"""
if uniform:
# 6 was used in the paper.
init_range = math.sqrt(6.0 / (n_inputs + n_outputs))
return standard_ops.random_uniform_initializer(-init_range, init_range)
else:
# 3 gives us approximately the same limits as above since this repicks
# values greater than 2 standard deviations from the mean.
stddev = math.sqrt(3.0 / (n_inputs + n_outputs))
return standard_ops.truncated_normal_initializer(stddev=stddev)
def _assert_summary_tag_unique(tag):
for summary in ops.get_collection(ops.GraphKeys.SUMMARIES):
old_tag = tensor_util.constant_value(summary.op.inputs[0])
if tag == str(old_tag):
raise ValueError('Conflict with summary tag: %s exists on summary %s %s' %
(tag, summary, old_tag))
def _add_scalar_summary(tensor, tag=None):
"""Add a summary operation for the tensor.
Args:
tensor: The tensor to summarize.
tag: The tag to use, if None then use tensor's op's name.
Returns:
The created histogram summary.
Raises:
ValueError: If the tag is already in use or the rank is not 0.
"""
tensor.get_shape().assert_has_rank(0)
tag = tag or tensor.op.name
_assert_summary_tag_unique(tag)
return standard_ops.scalar_summary(tag, tensor, name='%s_summary' % tag)
def _add_histogram_summary(tensor, tag=None):
"""Add a summary operation for the histogram of a tensor.
Args:
tensor: The tensor to summarize.
tag: The tag to use, if None then use tensor's op's name.
Returns:
The created histogram summary.
Raises:
ValueError: If the tag is already in use.
"""
tag = tag or tensor.op.name
_assert_summary_tag_unique(tag)
return standard_ops.histogram_summary(tag, tensor, name='%s_summary' % tag)
def _apply_activation_with_summaries(x, activation_fn):
"""Returns activation_fn(x).
This applies the given activation and adds useful summaries specific to the
activation.
Args:
x: The tensor to apply activation to.
activation_fn: An activation function.
Returns:
A tensor with activation applied to x.
"""
if activation_fn is None:
return x
y = activation_fn(x)
if activation_fn in (nn.relu, nn.softplus, nn.relu6):
# Using x for comparison to avoid floating point equality and/or epsilons.
_add_scalar_summary(
standard_ops.reduce_mean(standard_ops.to_float(standard_ops.less(
x, 0.0))), '%s/zeros' % y.op.name)
if activation_fn is nn.relu6:
_add_scalar_summary(
standard_ops.reduce_mean(standard_ops.to_float(standard_ops.greater(
x, 6.0))), '%s/sixes' % y.op.name)
if activation_fn is nn.l2_normalize:
_add_scalar_summary(
standard_ops.reduce_mean(standard_ops.sqrt(standard_ops.sum(
standard_ops.square(x), 1))), '%s/length' % y.op.name)
_add_histogram_summary(y, '%s/activations' % y.op.name)
return y
def _apply_regularization(w, regularizer):
loss = regularizer(w)
if loss:
ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, loss)
def l1_regularizer(scale):
"""Returns a function that can be used to apply L1 regularization to weights.
L1 regularization encourages sparsity.
Args:
scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer.
Returns:
A function with signature `l1(weights, name=None)` that apply L1
regularization.
Raises:
ValueError: If scale is outside of the range [0.0, 1.0] or if scale is not a
float.
"""
if isinstance(scale, numbers.Integral):
raise ValueError('scale cannot be an integer: %s' % scale)
if isinstance(scale, numbers.Real):
if scale < 0.:
raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
scale)
if scale >= 1.:
raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
scale)
if scale == 0.:
logging.info('Scale of 0 disables regularizer.')
return lambda _, name=None: None
def l1(weights, name=None):
"""Applies L1 regularization to weights."""
with ops.op_scope([weights], name, 'l1_regularizer') as scope:
my_scale = ops.convert_to_tensor(scale,
dtype=weights.dtype.base_dtype,
name='scale')
return standard_ops.mul(
my_scale,
standard_ops.reduce_sum(standard_ops.abs(weights)),
name=scope)
return l1
def l2_regularizer(scale):
"""Returns a function that can be used to apply L2 regularization to weights.
Small values of L2 can help prevent overfitting the training data.
Args:
scale: A scalar multiplier `Tensor`. 0.0 disables the regularizer.
Returns:
A function with signature `l2(weights, name=None)` that applies L2
regularization.
Raises:
ValueError: If scale is outside of the range [0.0, 1.0] or if scale is not a
float.
"""
if isinstance(scale, numbers.Integral):
raise ValueError('scale cannot be an integer: %s' % (scale,))
if isinstance(scale, numbers.Real):
if scale < 0.:
raise ValueError('Setting a scale less than 0 on a regularizer: %g.' %
scale)
if scale >= 1.:
raise ValueError('Setting a scale greater than 1 on a regularizer: %g.' %
scale)
if scale == 0.:
logging.info('Scale of 0 disables regularizer.')
return lambda _, name=None: None
def l2(weights, name=None):
"""Applies l2 regularization to weights."""
with ops.op_scope([weights], name, 'l2_regularizer') as scope:
my_scale = ops.convert_to_tensor(scale,
dtype=weights.dtype.base_dtype,
name='scale')
return standard_ops.mul(my_scale, nn.l2_loss(weights), name=scope)
return l2
def fully_connected(x,
num_output_nodes,
activation_fn=None,
weight_init=None,
bias_init=standard_ops.constant_initializer(0.),
num_input_nodes=None,
name=None,
weight_collections=None,
bias_collections=None,
weight_regularizer=None,
create_summaries=True):
"""Adds the parameters for a fully connected layer and returns the output.
A fully connected layer is generally defined as a matrix multiply:
\\\\(y = f(w * x + b)\\\\) where **f** is given by `activation_fn`
This op creates `w` and optionally `b` and adds various summaries that can be
useful for visualizing learning or diagnosing training problems. Bias can be
disabled by setting `bias_init` to `None`.
The variable creation is compatible with `tf.variable_scope` and so can be
reused with `tf.variable_scope` or `tf.make_template`.
In almost all cases, the number of input nodes can be inferred from the shape
of `x`, but if it is unspecified or additional size checks are desired, then
`num_input_nodes` can be specified.
Most of the details of variable creation can be controlled by specifying the
initializers (`weight_init` and `bias_init`) and which collections to place
the created variables in (`weight_collections` and `bias_collections`).
A per layer regularization can be specified by setting `weight_regularizer`.
This is only applied to weights and not the bias.
Args:
x: The input `Tensor`.
num_output_nodes: The size of the output.
activation_fn: A function that requires a single Tensor that is applied as a
non-linearity. If None is used, then this is a linear layer.
weight_init: An optional initialization. If not specified, uses Xavier
initialization (see `tf.learn.xavier_initializer`).
bias_init: An initializer for the bias, defaults to 0. Set to`None` in order
to disable bias.
num_input_nodes: The number of input nodes.
name: The name for this operation is used to name operations and to find
variables. If specified it must be unique for this scope, otherwise a
unique name starting with "fully_connected" will be created. See
`tf.variable_op_scope` for details.
weight_collections: List of graph collections for just weights.
bias_collections: List of graph collections for just bias.
weight_regularizer: A regularizer like the result of
`tf.learn.l1_regularizer` or `tf.learn.l2_regularizer`.
create_summaries: Set to false to disable summaries.
Returns:
The result of applying a fully connected layer.
Raises:
ValueError: if `x` is not rank 2; or `x`'s second dimension is not known
and `num_input_nodes` is not specified.
"""
with variable_scope.variable_op_scope([x], name, 'fully_connected') as vs:
# Check rank and if num_input_nodes is specified, make sure it matches.
x.get_shape().assert_is_compatible_with([None, num_input_nodes])
if not num_input_nodes:
if x.get_shape().dims is None or x.get_shape().dims[1].value is None:
raise ValueError(
'If x has an unknown second dimension then num_input_nodes '
'must be specified; shape: %s num_input_nodes: %s'
% (x.get_shape(), num_input_nodes))
else:
num_input_nodes = x.get_shape().dims[1].value
weight_init = weight_init or xavier_initializer(
num_input_nodes, num_output_nodes)
dtype = x.dtype.base_dtype
w = variable_scope.get_variable('weights',
shape=[num_input_nodes, num_output_nodes],
dtype=dtype,
initializer=weight_init,
collections=weight_collections)
if not vs.reuse and create_summaries:
_add_histogram_summary(w)
y = standard_ops.matmul(x, w)
# Regularization is only applied to the weights and not bias.
if weight_regularizer:
_apply_regularization(w, weight_regularizer)
if bias_init is not None:
b = variable_scope.get_variable('bias',
shape=[num_output_nodes],
dtype=dtype,
initializer=bias_init,
collections=bias_collections)
if not vs.reuse and create_summaries:
_add_histogram_summary(b)
y = nn.bias_add(y, b)
if create_summaries:
return _apply_activation_with_summaries(y, activation_fn)
else:
return activation_fn(y)
def convolution2d(x,
num_output_channels,
kernel_size,
activation_fn=None,
stride=(1, 1),
padding='SAME',
weight_init=None,
bias_init=standard_ops.constant_initializer(0.),
num_input_channels=None,
name=None,
weight_collections=None,
bias_collections=None,
weight_regularizer=None,
create_summaries=True):
"""Adds the parameters for a conv2d layer and returns the output.
A neural network convolution layer is generally defined as:
\\\\(y = f(conv2d(w, x) + b)\\\\) where **f** is given by `activation_fn`,
**conv2d** is `tf.nn.conv2d` and `x` has shape
`[batch, height, width, channels]`
This op creates `w` and optionally `b` and adds various summaries that can be
useful for visualizing learning or diagnosing training problems. Bias can be
disabled by setting `bias_init` to `None`.
The variable creation is compatible with `tf.variable_scope` and so can be
reused with `tf.variable_scope` or `tf.make_template`.
In almost all cases, the input channels can be inferred from the shape
of `x`, but if it is unspecified or additional size checks are
desired, then `num_input_channels` can be specified.
Most of the details of variable creation can be controlled by specifying the
initializers (`weight_init` and `bias_init`) and which collections to place
the created variables in (`weight_collections` and `bias_collections`).
A per layer regularization can be specified by setting `weight_regularizer`.
This is only applied to weights and not the bias.
Args:
x: The input `Tensor`.
num_output_channels: The number of output channels (i.e. the size of
dim[3]).
kernel_size: A length 2 `list` or `tuple` containing the kernel size.
activation_fn: A function that requires a single Tensor that is applied as a
non-linearity.
stride: A length 2 `list` or `tuple` specifying the stride of the sliding
window across the image.
padding: A `string` from: "SAME", "VALID". The type of padding algorithm to
use.
weight_init: An optional initialization. If not specified, uses Xavier
initialization (see `tf.learn.xavier_initializer`).
bias_init: An initializer for the bias, defaults to 0. Set to`None` in order
to disable bias.
num_input_channels: The length of the channel dimension in the input.
name: The name for this operation is used to name operations and to find
variables. If specified it must be unique for this scope, otherwise a
unique name starting with "convolution2d" will be created. See
`tf.variable_op_scope` for details.
weight_collections: List of graph collections for just weights.
bias_collections: List of graph collections for just bias.
weight_regularizer: A regularizer like the result of
`tf.learn.l1_regularizer` or `tf.learn.l2_regularizer`.
create_summaries: Set to false to disable summaries.
Returns:
The result of applying a fully connected layer.
Raises:
ValueError: if `x` is not rank 4; or `x`'s channel dimension is not known
and `num_input_channels` is not specified.
"""
with variable_scope.variable_op_scope([x], name, 'convolution2d') as vs:
# Check rank and if num_input_channels is specified, make sure it matches.
x.get_shape().assert_is_compatible_with([None, None, None,
num_input_channels])
if not num_input_channels:
if x.get_shape().dims is None or x.get_shape().dims[3].value is None:
raise ValueError(
'If x has an unknown channels dimension then num_input_channels '
'must be specified; shape: %s num_input_channels: %s'
% (x.get_shape(), num_input_channels))
else:
num_input_channels = x.get_shape().dims[3].value
# QQQ: Should we accept a scalar for a square convolution?
if len(kernel_size) != 2:
raise ValueError('kernel_size must be length 2: ' % kernel_size)
if len(stride) != 2:
raise ValueError('stride must be length 2: ' % kernel_size)
stride = [1, stride[0], stride[1], 1]
shape = [kernel_size[0], kernel_size[1], num_input_channels,
num_output_channels]
patch_size = kernel_size[0] * kernel_size[1]
weight_init = weight_init or xavier_initializer(
num_input_channels * patch_size, num_output_channels * patch_size)
dtype = x.dtype.base_dtype
w = variable_scope.get_variable('weights',
shape=shape,
dtype=dtype,
initializer=weight_init,
collections=weight_collections)
if not vs.reuse and create_summaries:
_add_histogram_summary(w)
y = nn.conv2d(x, w, stride, padding)
# Regularization is only applied to the weights and not bias.
if weight_regularizer:
_apply_regularization(w, weight_regularizer)
if bias_init is not None:
b = variable_scope.get_variable('bias',
shape=[num_output_channels],
dtype=dtype,
initializer=bias_init,
collections=bias_collections)
if not vs.reuse and create_summaries:
_add_histogram_summary(b)
y = nn.bias_add(y, b)
if create_summaries:
return _apply_activation_with_summaries(y, activation_fn)
else:
return activation_fn(y)