Paddle/python/paddle/fluid/param_attr.py at develop · githubusr1/Paddle

History

238 lines (201 loc) · 8.32 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

from __future__ import print_function

import six

from .initializer import Initializer, Xavier, Constant

from .regularizer import WeightDecayRegularizer

__all__ = [

'ParamAttr',

'WeightNormParamAttr',

]

class ParamAttr(object):

"""

Parameter attributes object. To fine-tuning network training process, user

can set parameter's attributes to control training details. Such as learning rate,

regularization, trainable, do_model_average and the method to initialize param.

Args:

name(str): The parameter's name. Default None.

initializer(Initializer): The method to initial this parameter. Default None.

learning_rate(float): The parameter's learning rate. The learning rate when

optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.

Default 1.0.

regularizer(WeightDecayRegularizer): Regularization factor. Default None.

trainable(bool): Whether this parameter is trainable. Default True.

gradient_clip(BaseGradientClipAttr): The method to clip this parameter's

gradient. Default None.

do_model_average(bool): Whether this parameter should do model average.

Default False.

Examples:

.. code-block:: python

import paddle.fluid as fluid

w_param_attrs = fluid.ParamAttr(name="fc_weight",

learning_rate=0.5,

regularizer=fluid.regularizer.L2Decay(1.0),

trainable=True)

x = fluid.layers.data(name='X', shape=[1], dtype='float32')

y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)

"""

def __init__(self,

name=None,

initializer=None,

learning_rate=1.0,

regularizer=None,

trainable=True,

gradient_clip=None,

do_model_average=False):

self.name = name

self.initializer = initializer

self.learning_rate = learning_rate

self.regularizer = regularizer

self.trainable = trainable

self.gradient_clip = gradient_clip

self.model_average = do_model_average

def _set_default_initializer(self, initializer):

"""

Set the default initializer, the initializer should be Constant,

Uniform, Normal, Xavier, MSRA.

Args:

initializer(Initializer): the initializer to set.

Returns:

None

"""

if initializer is None:

if self.initializer is None:

raise ValueError("ParamAttr.initializer is not set")

return

if self.initializer is not None:

return

self.initializer = initializer

def _set_default_param_initializer(self):

"""

Set the default initializer for the parameter with Xavier.

Args:

None.

Returns:

None.

"""

self._set_default_initializer(Xavier())

def _set_default_bias_initializer(self):

"""

Set the default initializer for the bias with Constant(0.0).

Args:

None.

Returns:

None.

"""

self._set_default_initializer(Constant(0.0))

@staticmethod

def _to_attr(arg):

"""

Create ParamAttr[s].

Args:

arg: Arguments to initialize ParamAttr[s]. arg's type can be

str, Initializer, float, WeightDecayRegularizer, BaseGradientClipAttr,

bool, ParamAttr, or a list of above type.

Returns:

ParamAttr[s]: ParamAttr[s] initialized with arg.

Raises:

arg can not initialize a ParamAttr.

"""

if arg is None:

return ParamAttr()

elif isinstance(arg, list) or isinstance(arg, tuple):

return [ParamAttr._to_attr(a) for a in arg]

elif isinstance(arg, ParamAttr):

return arg

elif isinstance(arg, six.string_types):

return ParamAttr(name=arg)

elif isinstance(arg, Initializer):

return ParamAttr(initializer=arg)

elif isinstance(arg, WeightDecayRegularizer):

return ParamAttr(regularizer=arg)

elif isinstance(arg, bool):

return ParamAttr._to_attr(None) if arg else False

else:

raise TypeError("{0} cast to ParamAttr".format(type(arg)))

def _to_kwargs(self, with_initializer=False):

"""

Returns the attributes of this parameter.

Args:

with_initializer(bool): Whether to add initializer attr.

Returns:

Parameter attributes(map): The attributes of this parameter.

"""

kwargs = {

'name': self.name,

'optimize_attr': {

'learning_rate': self.learning_rate

'regularizer': self.regularizer,

'trainable': self.trainable,

'gradient_clip_attr': self.gradient_clip,

'model_average': self.model_average

}

if with_initializer:

kwargs['initializer'] = self.initializer

return kwargs

class WeightNormParamAttr(ParamAttr):

"""

Used for weight Norm. Weight Norm is a reparameterization of the weight vectors

in a neural network that decouples the length of those weight vectors from

their direction. Weight Norm has been implemented as discussed in this

paper: `Weight Normalization: A Simple Reparameterization to Accelerate

Training of Deep Neural Networks

<https://arxiv.org/pdf/1602.07868.pdf>`_.

Args:

dim(list): The parameter's name. Default None.

name(str): The parameter's name. Default None.

initializer(Initializer): The method to initial this parameter. Default None.

learning_rate(float): The parameter's learning rate. The learning rate when

optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.

Default 1.0.

regularizer(WeightDecayRegularizer): Regularization factor. Default None.

trainable(bool): Whether this parameter is trainable. Default True.

gradient_clip(BaseGradientClipAttr): The method to clip this parameter's

gradient. Default None.

do_model_average(bool): Whether this parameter should do model average.

Default False.

Examples:

.. code-block:: python

import paddle.fluid as fluid

data = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32")

fc = fluid.layers.fc(input=data,

size=1000,

param_attr=fluid.WeightNormParamAttr(

dim=None,

name='weight_norm_param'))

"""

# List to record the parameters reparameterized by weight normalization.

# If these parameters are treated as Variable rather than Parameter,

# it can be used to discriminate these parameters and help to serialize

# these paramters for inference.

params_with_weight_norm = []

def __init__(self,

dim=None,

name=None,

initializer=None,

learning_rate=1.0,

regularizer=None,

trainable=True,

gradient_clip=None,

do_model_average=False):

super(WeightNormParamAttr, self).__init__(

name=name,

initializer=initializer,

learning_rate=learning_rate,

regularizer=regularizer,

trainable=trainable,

gradient_clip=gradient_clip,

do_model_average=do_model_average)

self.dim = dim

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

param_attr.py

Latest commit

History

param_attr.py

File metadata and controls