Skip to content

Commit 1dd313f

Browse files
Make dtype conversion more explicit.
Make x, y, shuffle, input_dtype and output_dtype immutable. Change: 129160426
1 parent bc66656 commit 1dd313f

8 files changed

Lines changed: 278 additions & 162 deletions

File tree

tensorflow/contrib/factorization/python/ops/kmeans_test.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,11 @@ def test_infer(self):
153153
def test_fit_with_cosine_distance(self):
154154
# Create points on y=x and y=1.5x lines to check the cosine similarity.
155155
# Note that euclidean distance will give different results in this case.
156-
points = np.array([[9, 9], [0.5, 0.5], [10, 15], [0.4, 0.6]])
156+
points = np.array(
157+
[[9, 9], [0.5, 0.5], [10, 15], [0.4, 0.6]], dtype=np.float32)
157158
# true centers are the unit vectors on lines y=x and y=1.5x
158-
true_centers = np.array([[0.70710678, 0.70710678], [0.5547002, 0.83205029]])
159+
true_centers = np.array(
160+
[[0.70710678, 0.70710678], [0.5547002, 0.83205029]], dtype=np.float32)
159161
kmeans = KMeans(2,
160162
initial_clusters=kmeans_ops.RANDOM_INIT,
161163
distance_metric=kmeans_ops.COSINE_DISTANCE,
@@ -168,8 +170,9 @@ def test_fit_with_cosine_distance(self):
168170
np.sort(true_centers, axis=0))
169171

170172
def test_transform_with_cosine_distance(self):
171-
points = np.array([[2.5, 3.5], [2, 8], [3, 1], [3, 18],
172-
[-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]])
173+
points = np.array(
174+
[[2.5, 3.5], [2, 8], [3, 1], [3, 18],
175+
[-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]], dtype=np.float32)
173176

174177
true_centers = [normalize(np.mean(normalize(points)[4:, :], axis=0,
175178
keepdims=True))[0],
@@ -193,16 +196,16 @@ def test_transform_with_cosine_distance(self):
193196
self.assertAllClose(transform, true_transform, atol=1e-3)
194197

195198
def test_predict_with_cosine_distance(self):
196-
points = np.array([[2.5, 3.5], [2, 8], [3, 1], [3, 18],
197-
[-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]]).astype(
198-
np.float32)
199+
points = np.array(
200+
[[2.5, 3.5], [2, 8], [3, 1], [3, 18],
201+
[-2.5, -3.5], [-2, -8], [-3, -1], [-3, -18]], dtype=np.float32)
199202
true_centers = np.array(
200203
[normalize(np.mean(normalize(points)[0:4, :],
201204
axis=0,
202205
keepdims=True))[0],
203206
normalize(np.mean(normalize(points)[4:, :],
204207
axis=0,
205-
keepdims=True))[0]])
208+
keepdims=True))[0]], dtype=np.float32)
206209
true_assignments = [0] * 4 + [1] * 4
207210
true_score = len(points) - np.tensordot(normalize(points),
208211
true_centers[true_assignments])
@@ -230,14 +233,14 @@ def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
230233
# the less populated centers.
231234
points = np.array([[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3],
232235
[-3.1, -3.2], [-2.8, -3.], [-2.9, -3.1], [-3., -3.1],
233-
[-3., -3.1], [-3.2, -3.], [-3., -3.]]).astype(np.float32)
236+
[-3., -3.1], [-3.2, -3.], [-3., -3.]], dtype=np.float32)
234237
true_centers = np.array(
235238
[normalize(np.mean(normalize(points)[0:2, :], axis=0,
236239
keepdims=True))[0],
237240
normalize(np.mean(normalize(points)[2:4, :], axis=0,
238241
keepdims=True))[0],
239242
normalize(np.mean(normalize(points)[4:, :], axis=0,
240-
keepdims=True))[0]])
243+
keepdims=True))[0]], dtype=np.float32)
241244
true_assignments = [0] * 2 + [1] * 2 + [2] * 8
242245
true_score = len(points) - np.tensordot(normalize(points),
243246
true_centers[true_assignments])
@@ -262,15 +265,15 @@ def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
262265
self.assertAllClose(score, true_score, atol=1e-2)
263266

264267
def test_fit_raise_if_num_clusters_larger_than_num_points_random_init(self):
265-
points = np.array([[2.0, 3.0], [1.6, 8.2]])
268+
points = np.array([[2.0, 3.0], [1.6, 8.2]], dtype=np.float32)
266269

267270
with self.assertRaisesOpError('less'):
268271
kmeans = KMeans(num_clusters=3, initial_clusters=kmeans_ops.RANDOM_INIT)
269272
kmeans.fit(x=points, steps=10, batch_size=8)
270273

271274
def test_fit_raise_if_num_clusters_larger_than_num_points_kmeans_plus_plus(
272275
self):
273-
points = np.array([[2.0, 3.0], [1.6, 8.2]])
276+
points = np.array([[2.0, 3.0], [1.6, 8.2]], dtype=np.float32)
274277

275278
with self.assertRaisesOpError(AssertionError):
276279
kmeans = KMeans(num_clusters=3,

tensorflow/contrib/learn/python/learn/estimators/classifier_test.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,10 @@
2929

3030
def iris_input_fn(num_epochs=None):
3131
iris = tf.contrib.learn.datasets.load_iris()
32-
features = tf.cast(
33-
tf.reshape(
34-
tf.constant(iris.data), [-1, 4]), tf.float32)
32+
features = tf.reshape(tf.constant(iris.data), [-1, 4])
3533
if num_epochs:
3634
features = tf.train.limit_epochs(features, num_epochs=num_epochs)
37-
target = tf.cast(
38-
tf.reshape(
39-
tf.constant(iris.target), [-1]), tf.int64)
35+
target = tf.reshape(tf.constant(iris.target), [-1])
4036
return features, target
4137

4238

tensorflow/contrib/learn/python/learn/estimators/estimator_test.py

Lines changed: 32 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -36,32 +36,26 @@
3636

3737
def boston_input_fn(num_epochs=None):
3838
boston = tf.contrib.learn.datasets.load_boston()
39-
features = tf.cast(
40-
tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM]), tf.float32)
39+
features = tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM])
4140
if num_epochs:
4241
features = tf.train.limit_epochs(features, num_epochs=num_epochs)
43-
target = tf.cast(
44-
tf.reshape(tf.constant(boston.target), [-1, 1]), tf.float32)
42+
target = tf.reshape(tf.constant(boston.target), [-1, 1])
4543
return features, target
4644

4745

4846
def iris_input_fn():
4947
iris = tf.contrib.learn.datasets.load_iris()
50-
features = tf.cast(
51-
tf.reshape(tf.constant(iris.data), [-1, _IRIS_INPUT_DIM]), tf.float32)
52-
target = tf.cast(
53-
tf.reshape(tf.constant(iris.target), [-1]), tf.int32)
48+
features = tf.reshape(tf.constant(iris.data), [-1, _IRIS_INPUT_DIM])
49+
target = tf.reshape(tf.constant(iris.target), [-1])
5450
return features, target
5551

5652

5753
def boston_eval_fn():
5854
boston = tf.contrib.learn.datasets.load_boston()
5955
n_examples = len(boston.target)
60-
features = tf.cast(
61-
tf.reshape(tf.constant(boston.data), [n_examples, _BOSTON_INPUT_DIM]),
62-
tf.float32)
63-
target = tf.cast(
64-
tf.reshape(tf.constant(boston.target), [n_examples, 1]), tf.float32)
56+
features = tf.reshape(
57+
tf.constant(boston.data), [n_examples, _BOSTON_INPUT_DIM])
58+
target = tf.reshape(tf.constant(boston.target), [n_examples, 1])
6559
return tf.concat(0, [features, features]), tf.concat(0, [target, target])
6660

6761

@@ -188,7 +182,7 @@ def testUntrained(self):
188182
with self.assertRaises(tf.contrib.learn.NotFittedError):
189183
_ = est.evaluate(
190184
x=boston.data,
191-
y=boston.target.astype(np.float32))
185+
y=boston.target.astype(np.float64))
192186
with self.assertRaises(tf.contrib.learn.NotFittedError):
193187
est.predict(x=boston.data)
194188

@@ -197,10 +191,11 @@ def testContinueTraining(self):
197191
output_dir = tempfile.mkdtemp()
198192
est = tf.contrib.learn.Estimator(model_fn=linear_model_fn,
199193
model_dir=output_dir)
200-
est.fit(x=boston.data, y=boston.target.astype(np.float32), steps=50)
194+
float64_target = boston.target.astype(np.float64)
195+
est.fit(x=boston.data, y=float64_target, steps=50)
201196
scores = est.evaluate(
202197
x=boston.data,
203-
y=boston.target.astype(np.float32),
198+
y=float64_target,
204199
metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
205200
del est
206201
# Create another estimator object with the same output dir.
@@ -210,35 +205,36 @@ def testContinueTraining(self):
210205
# Check we can evaluate and predict.
211206
scores2 = est2.evaluate(
212207
x=boston.data,
213-
y=boston.target.astype(np.float32),
208+
y=float64_target,
214209
metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
215210
self.assertAllClose(scores2['MSE'],
216211
scores['MSE'])
217212
predictions = est2.predict(x=boston.data)
218-
other_score = _sklearn.mean_squared_error(predictions, boston.target)
213+
other_score = _sklearn.mean_squared_error(predictions, float64_target)
219214
self.assertAllClose(other_score, scores['MSE'])
220215

221216
# Check we can keep training.
222-
est2.fit(x=boston.data, y=boston.target.astype(np.float32), steps=100)
217+
est2.fit(x=boston.data, y=float64_target, steps=100)
223218
scores3 = est2.evaluate(
224219
x=boston.data,
225-
y=boston.target.astype(np.float32),
220+
y=float64_target,
226221
metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
227222
self.assertLess(scores3['MSE'], scores['MSE'])
228223

229224
def testEstimatorParams(self):
230225
boston = tf.contrib.learn.datasets.load_boston()
231226
est = tf.contrib.learn.Estimator(model_fn=linear_model_params_fn,
232227
params={'learning_rate': 0.01})
233-
est.fit(x=boston.data, y=boston.target.astype(np.float32), steps=100)
228+
est.fit(x=boston.data, y=boston.target, steps=100)
234229

235230
def testBostonAll(self):
236231
boston = tf.contrib.learn.datasets.load_boston()
237232
est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
238-
est.fit(x=boston.data, y=boston.target.astype(np.float32), steps=100)
233+
float64_target = boston.target.astype(np.float64)
234+
est.fit(x=boston.data, y=float64_target, steps=100)
239235
scores = est.evaluate(
240236
x=boston.data,
241-
y=boston.target.astype(np.float32),
237+
y=float64_target,
242238
metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
243239
predictions = est.predict(x=boston.data)
244240
other_score = _sklearn.mean_squared_error(predictions, boston.target)
@@ -277,7 +273,7 @@ def testIrisIterator(self):
277273
iris = tf.contrib.learn.datasets.load_iris()
278274
est = tf.contrib.learn.Estimator(model_fn=logistic_model_no_mode_fn)
279275
x_iter = itertools.islice(iris.data, 100)
280-
y_iter = itertools.islice(np.int32(iris.target), 100)
276+
y_iter = itertools.islice(iris.target, 100)
281277
est.fit(x_iter, y_iter, steps=100)
282278
_ = est.evaluate(input_fn=iris_input_fn, steps=1)
283279
predictions = est.predict(x=iris.data)['class']
@@ -374,19 +370,16 @@ def _assert_single_feature_column(
374370
'': tf.FixedLenFeature(shape=expected_shape, dtype=expected_dtype)
375371
}, feature_column.config)
376372

377-
# Note: See tf.contrib.learn.io.data_feeder for why int32 converts to float32.
378373
def testInt32Input(self):
379374
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
380375
np.ones(shape=[7, 8], dtype=np.int32))
381-
self._assert_single_feature_column([8], tf.float32, feature_columns)
376+
self._assert_single_feature_column([8], tf.int32, feature_columns)
382377

383378
def testInt32InputFn(self):
384379
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn(
385380
lambda: (tf.ones(shape=[7, 8], dtype=tf.int32), None))
386381
self._assert_single_feature_column([8], tf.int32, feature_columns)
387382

388-
# Note: See tf.contrib.learn.io.data_feeder for why int64 doesn't convert to
389-
# float64.
390383
def testInt64Input(self):
391384
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
392385
np.ones(shape=[7, 8], dtype=np.int64))
@@ -407,22 +400,21 @@ def testFloat32InputFn(self):
407400
lambda: (tf.ones(shape=[7, 8], dtype=tf.float32), None))
408401
self._assert_single_feature_column([8], tf.float32, feature_columns)
409402

410-
# Note: See tf.contrib.learn.io.data_feeder for why float64 converts to
411-
# float32.
412403
def testFloat64Input(self):
413404
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
414405
np.ones(shape=[7, 8], dtype=np.float64))
415-
self._assert_single_feature_column([8], tf.float32, feature_columns)
406+
self._assert_single_feature_column([8], tf.float64, feature_columns)
416407

417408
def testFloat64InputFn(self):
418409
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn(
419410
lambda: (tf.ones(shape=[7, 8], dtype=tf.float64), None))
420411
self._assert_single_feature_column([8], tf.float64, feature_columns)
421412

422413
def testBoolInput(self):
423-
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
424-
np.array([[False for _ in xrange(8)] for _ in xrange(7)]))
425-
self._assert_single_feature_column([8], tf.float32, feature_columns)
414+
with self.assertRaisesRegexp(
415+
ValueError, 'on integer or non floating types are not supported'):
416+
tf.contrib.learn.infer_real_valued_columns_from_input(
417+
np.array([[False for _ in xrange(8)] for _ in xrange(7)]))
426418

427419
def testBoolInputFn(self):
428420
with self.assertRaisesRegexp(
@@ -431,18 +423,12 @@ def testBoolInputFn(self):
431423
tf.contrib.learn.infer_real_valued_columns_from_input_fn(
432424
lambda: (tf.constant(False, shape=[7, 8], dtype=tf.bool), None))
433425

434-
def testInvalidStringInput(self):
435-
# pylint: disable=g-long-lambda
426+
def testStringInput(self):
436427
with self.assertRaisesRegexp(
437-
ValueError, 'could not convert string to float'):
428+
ValueError, 'on integer or non floating types are not supported'):
429+
# pylint: disable=g-long-lambda
438430
tf.contrib.learn.infer_real_valued_columns_from_input(
439-
np.array([['foo%d' % i for i in xrange(8)] for _ in xrange(7)]))
440-
441-
def testStringInput(self):
442-
# pylint: disable=g-long-lambda
443-
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
444-
np.array([['%d.0' % i for i in xrange(8)] for _ in xrange(7)]))
445-
self._assert_single_feature_column([8], tf.float32, feature_columns)
431+
np.array([['%d.0' % i for i in xrange(8)] for _ in xrange(7)]))
446432

447433
def testStringInputFn(self):
448434
with self.assertRaisesRegexp(
@@ -457,13 +443,13 @@ def testBostonInputFn(self):
457443
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn(
458444
boston_input_fn)
459445
self._assert_single_feature_column(
460-
[_BOSTON_INPUT_DIM], tf.float32, feature_columns)
446+
[_BOSTON_INPUT_DIM], tf.float64, feature_columns)
461447

462448
def testIrisInputFn(self):
463449
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input_fn(
464450
iris_input_fn)
465451
self._assert_single_feature_column(
466-
[_IRIS_INPUT_DIM], tf.float32, feature_columns)
452+
[_IRIS_INPUT_DIM], tf.float64, feature_columns)
467453

468454
if __name__ == '__main__':
469455
tf.test.main()

tensorflow/contrib/learn/python/learn/estimators/random_forest.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,43 @@
1717
from __future__ import division
1818
from __future__ import print_function
1919

20-
import time
21-
2220
import numpy as np
2321
import six
2422

2523
from tensorflow.contrib import framework as contrib_framework
2624
from tensorflow.contrib.learn.python.learn import monitors as mon
2725

2826
from tensorflow.contrib.learn.python.learn.estimators import estimator
29-
from tensorflow.contrib.learn.python.learn.estimators import run_config
3027

3128
from tensorflow.contrib.tensor_forest.client import eval_metrics
3229
from tensorflow.contrib.tensor_forest.data import data_ops
3330
from tensorflow.contrib.tensor_forest.python import tensor_forest
3431

32+
from tensorflow.python.framework import dtypes
3533
from tensorflow.python.ops import array_ops
3634
from tensorflow.python.ops import control_flow_ops
3735
from tensorflow.python.ops import math_ops
3836
from tensorflow.python.ops import state_ops
3937

4038

39+
def _assert_float32(tensors):
40+
"""Assert all tensors are float32.
41+
42+
Args:
43+
tensors: `Tensor` or `dict` of `Tensor` objects.
44+
45+
Raises:
46+
TypeError: if any tensor is not float32.
47+
"""
48+
if not isinstance(tensors, dict):
49+
tensors = [tensors]
50+
else:
51+
tensors = tensors.values()
52+
for tensor in tensors:
53+
if tensor.dtype.base_dtype != dtypes.float32:
54+
raise TypeError('Expected dtype=float32, %s.' % tensor)
55+
56+
4157
class LossMonitor(mon.EveryN):
4258
"""Terminates training when training loss stops decreasing."""
4359

@@ -146,6 +162,8 @@ def _get_train_ops(self, features, targets):
146162
Returns:
147163
Tuple of train `Operation` and loss `Tensor`.
148164
"""
165+
_assert_float32(features)
166+
_assert_float32(targets)
149167
features, spec = data_ops.ParseDataTensorOrDict(features)
150168
labels = data_ops.ParseLabelTensorOrDict(targets)
151169

@@ -168,13 +186,16 @@ def _get_train_ops(self, features, targets):
168186
return train, self.training_loss
169187

170188
def _get_predict_ops(self, features):
189+
_assert_float32(features)
171190
graph_builder = self.graph_builder_class(
172191
self.params, device_assigner=self.device_assigner, training=False,
173192
**self.construction_args)
174193
features, spec = data_ops.ParseDataTensorOrDict(features)
175194
return graph_builder.inference_graph(features, data_spec=spec)
176195

177196
def _get_eval_ops(self, features, targets, metrics):
197+
_assert_float32(features)
198+
_assert_float32(targets)
178199
features, spec = data_ops.ParseDataTensorOrDict(features)
179200
labels = data_ops.ParseLabelTensorOrDict(targets)
180201

0 commit comments

Comments
 (0)