pytorch
diff --git a/‎.circleci/scripts/binary_linux_test.sh‎
Lines changed: 1 addition & 1 deletion b/‎.circleci/scripts/binary_linux_test.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.circleci/scripts/binary_populate_env.sh‎
Lines changed: 3 additions & 3 deletions b/‎.circleci/scripts/binary_populate_env.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎aten/src/ATen/core/jit_type.h‎
Lines changed: 10 additions & 0 deletions b/‎aten/src/ATen/core/jit_type.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/QuantizedLinear.cpp‎
Lines changed: 117 additions & 31 deletions b/‎aten/src/ATen/native/QuantizedLinear.cpp‎
Lines changed: 117 additions & 31 deletions
diff --git a/‎aten/src/ATen/native/RNN.cpp‎
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/native/RNN.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/native/TensorShape.cpp‎
Lines changed: 10 additions & 0 deletions b/‎aten/src/ATen/native/TensorShape.cpp‎
Lines changed: 10 additions & 0 deletions
@@ -26,7 +26,7 @@ pkg="/final_pkgs/\$(ls /final_pkgs)"
 if [[ "$PACKAGE_TYPE" == conda ]]; then
   conda install -y "\$pkg" --offline
   if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
-    conda install -y cpu-only -c pytorch
+    conda install -y cpuonly -c pytorch
   fi
   retry conda install -yq future numpy protobuf six
   if [[ "$DESIRED_CUDA" != 'cpu' ]]; then
 
@@ -53,9 +53,9 @@ fi
 # We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
 export DATE="$(date -u +%Y%m%d)"
 if [[ "$(uname)" == 'Darwin' ]] || [[ "$DESIRED_CUDA" == "cu100" ]]; then
-  export PYTORCH_BUILD_VERSION="1.2.0.dev$DATE"
+  export PYTORCH_BUILD_VERSION="1.3.0.dev$DATE"
 else
-  export PYTORCH_BUILD_VERSION="1.2.0.dev$DATE+$DESIRED_CUDA"
+  export PYTORCH_BUILD_VERSION="1.3.0.dev$DATE+$DESIRED_CUDA"
 fi
 export PYTORCH_BUILD_NUMBER=1
 
@@ -72,7 +72,7 @@ export BUILD_PYTHONLESS="${BUILD_PYTHONLESS:-}"
 export DESIRED_DEVTOOLSET="$DESIRED_DEVTOOLSET"
 
 export DATE="$DATE"
-export NIGHTLIES_DATE_PREAMBLE=1.2.0.dev
+export NIGHTLIES_DATE_PREAMBLE=1.3.0.dev
 export PYTORCH_BUILD_VERSION="$PYTORCH_BUILD_VERSION"
 export PYTORCH_BUILD_NUMBER="$PYTORCH_BUILD_NUMBER"
 export OVERRIDE_PACKAGE_VERSION="$PYTORCH_BUILD_VERSION"
 
@@ -1627,6 +1627,16 @@ struct CAFFE2_API ClassType : public NamedType {
   // These variants are not registered in the global class table.
   ClassTypePtr refine(at::ArrayRef<TypePtr> refined_slots) const;
 
+  TypePtr createWithContained(std::vector<TypePtr> contained_types) const override {
+    auto ptr = ClassType::create(name_, compilation_unit_);
+    AT_ASSERT(numAttributes() == contained_types.size());
+    for(size_t i = 0; i < attributeNames_.size(); ++i) {
+      AT_ASSERT(attributeTypes_[i]->isSubtypeOf(contained_types[i]));
+      ptr->addAttribute(attributeNames_[i], contained_types[i]);
+    }
+    return ptr;
+  }
+
   bool is_module() const {
     return bool(parameterSlots_);
   }
 
@@ -25,14 +25,14 @@ namespace caffe2 {
 CAFFE_KNOWN_TYPE(fbgemm::PackBMatrix<int8_t>);
 CAFFE_KNOWN_TYPE(fbgemm::PackedGemmMatrixFP16);
 #endif // USE_FBGEMM
-}
+} // namespace caffe2
 
 namespace at {
 namespace native {
 
 #ifdef USE_FBGEMM
 
-Tensor fbgemm_linear_int8_weight(
+Tensor fbgemm_linear_int8_weight_fp32_activation(
     const Tensor& input,
     const Tensor& weight,
     const Tensor& packed,
@@ -70,13 +70,14 @@ Tensor fbgemm_linear_int8_weight(
   // Input tensor is quantized as 8-bit unsigned values
   static constexpr int precision = 8;
   static constexpr bool is_signed = false;
+  static constexpr int bound = (1 << (precision - 1));
 
   // Calculate scale and zero point for quantization of input tensor
   auto q_params = fbgemm::ChooseQuantizationParams(
       /*min=*/x_min,
       /*max=*/x_max,
-      /*qmin=*/is_signed ? -(1 << (precision - 1)) : 0,
-      /*qmax=*/is_signed ? ((1 << (precision - 1)) - 1) : (1 << precision) - 1,
+      /*qmin=*/is_signed ? -bound : 0,
+      /*qmax=*/is_signed ? (bound - 1) : (1 << precision) - 1,
       /*preserve_sparsity=*/false);
 
   q_params.precision = precision;
@@ -119,7 +120,7 @@ Tensor fbgemm_linear_int8_weight(
   //  1) Add in row and column offsets to the rows and columns, respectively
   //  2) Dequantize the results into floating point
   //  3) Add in the bias term
-  fbgemm::ReQuantizeForFloat<false /* FUSE_RELU*/> outputProcObj(
+  fbgemm::ReQuantizeForFloat</*FUSE_RELU*/false> outputProcObj(
       /*nextop=*/doNothingObj,
       /*Aq_scale=*/q_params.scale,
       /*Bq_scale=*/&weight_scale_float,
@@ -128,10 +129,11 @@ Tensor fbgemm_linear_int8_weight(
       /*row_offsets=*/packA.getRowOffsetBuffer(),
       /*col_offsets=*/col_offsets.data<int32_t>(),
       /*bias=*/bias_contig.data<float>(),
-      /*ncol=*/N);
+      /*nCol=*/N);
 
   // Allocate output Tensor and a buffer for fbgemmPacked to use
-  auto output = at::zeros({M, N}, bias.options().dtype(at::kFloat));
+  auto output = at::zeros(
+      {M, N}, bias.options().dtype(at::kFloat));
   auto buffer = at::zeros_like(output, output.options().dtype(at::kInt));
 
   // Pull out the PackBMatrix instance from the owning tensor
@@ -155,11 +157,33 @@ Tensor fbgemm_linear_int8_weight(
   return output.view(out_sizes);
 }
 
+Tensor fbgemm_linear_int8_weight(
+    const Tensor& input,
+    const Tensor& weight,
+    const Tensor& packed,
+    const Tensor& col_offsets,
+    Scalar weight_scale,
+    Scalar weight_zero_point,
+    const Tensor& bias) {
+  TORCH_WARN(
+      "fbgemm_linear_int8_weight will be deprecated soon."
+      "Please use fbgemm_linear_int8_weight_fp32_activation instead.");
+
+  return at::native::fbgemm_linear_int8_weight_fp32_activation(
+      input,
+      weight,
+      packed,
+      col_offsets,
+      weight_scale,
+      weight_zero_point,
+      bias);
+}
+
 namespace {
 // Calculate the column offsets
 // Note this includes the sum of the columns as well as the scalar term
-// B_zero_point * K, whereas the row_offsets created by PackAWithQuantRowOffset
-// is only the sum of the A rows.
+// B_zero_point * K, whereas the row_offsets created by
+// PackAWithQuantRowOffset is only the sum of the A rows.
 void calc_col_offsets_transpose(
     int K,
     int N,
@@ -195,11 +219,12 @@ std::tuple<Tensor, Tensor, double, int64_t> fbgemm_linear_quantize_weight(
   // Choose parameters for quantizing the weight as 8-bit signed integer
   static constexpr bool is_signed = true;
   static constexpr int precision = 8;
+  static constexpr int bound = (1 << (precision - 1));
   auto q_params = fbgemm::ChooseQuantizationParams(
       /*min=*/w_min,
       /*max=*/w_max,
-      /*qmin=*/is_signed ? -(1 << (precision - 1)) : 0,
-      /*qmax=*/is_signed ? ((1 << (precision - 1)) - 1) : (1 << precision) - 1,
+      /*qmin=*/is_signed ? -bound : 0,
+      /*qmax=*/is_signed ? (bound - 1) : (1 << precision) - 1,
       /*preserve_sparsity=*/false);
 
   q_params.precision = precision;
@@ -230,14 +255,13 @@ bool fbgemm_is_cpu_supported() {
   return fbgemm::fbgemmSupportedCPU();
 }
 
-Tensor fbgemm_pack_quantized_matrix(
-    const Tensor& weight,
-    int64_t K,
-    int64_t N) {
+Tensor fbgemm_pack_quantized_matrix(const Tensor& weight) {
   // We make a strong guarantee that models using these operators will have the
   // same numerics across different machines. Therefore, we do not provide a
   // fallback path and rather fail loudly if we cannot run FBGEMM.
   TORCH_CHECK(fbgemm::fbgemmSupportedCPU(), "Your CPU doesn't support FBGEMM.");
+  int64_t K = weight.size(1);
+  int64_t N = weight.size(0);
   auto weight_contig = weight.contiguous();
   auto contiguous_ptr = weight_contig.data<int8_t>();
   auto ptr = guts::make_unique<fbgemm::PackBMatrix<int8_t>>(
@@ -251,8 +275,18 @@ Tensor fbgemm_pack_quantized_matrix(
   return cpp_custom_type_hack::create(std::move(ptr), weight.options());
 }
 
-float raw_uint16_to_fp16(unsigned short value)
-{
+Tensor fbgemm_pack_quantized_matrix(
+    const Tensor& weight,
+    int64_t K,
+    int64_t N) {
+  TORCH_WARN(
+      "fbgemm_pack_quantized_matrix(weight, K, N) will be deprecated soon."
+      "Please use fbgemm_pack_quantized_matrix(weight) instead.");
+
+  return at::native::fbgemm_pack_quantized_matrix(weight);
+}
+
+float raw_uint16_to_fp16(unsigned short value) {
   // Convert raw 16 bits half precision floating point number
   // to single precision floating point number.
   unsigned short sign_bits = value >> 15;
@@ -284,7 +318,7 @@ bool check_and_saturate(T* element, T MAX) {
 // number will be saturated to max or min representable values by FP16.
 void handle_weights_saturation(float* weight, int64_t length) {
   float FP16_MAX = raw_uint16_to_fp16(0x7BFF);
-  bool  found_out_of_range = false;
+  bool found_out_of_range = false;
 
   for (int i = 0; i < length; ++i) {
     if (check_and_saturate<float>(&weight[i], FP16_MAX)) {
@@ -297,8 +331,7 @@ void handle_weights_saturation(float* weight, int64_t length) {
   }
 }
 
-Tensor fbgemm_pack_gemm_matrix_fp16(
-    const Tensor& weight ) {
+Tensor fbgemm_pack_gemm_matrix_fp16(const Tensor& weight) {
   // We make a strong guarantee that models using these operators will have the
   // same numerics across different machines. Therefore, we do not provide a
   // fallback path and rather fail loudly if we cannot run FBGEMM.
@@ -309,7 +342,7 @@ Tensor fbgemm_pack_gemm_matrix_fp16(
   Tensor weight_contig = weight.contiguous();
   auto weight_contig_ptr = weight_contig.data<float>();
 
-  handle_weights_saturation(weight_contig_ptr, K*N);
+  handle_weights_saturation(weight_contig_ptr, K * N);
 
   // TODO(mingzhe09088):
   // Consider using a functor here in PackedGemmMatrixFP16
@@ -319,15 +352,11 @@ Tensor fbgemm_pack_gemm_matrix_fp16(
   // within this translation unit. It might be very problematic if that tensor
   // flows across dll boundaries.
   auto ptr = guts::make_unique<fbgemm::PackedGemmMatrixFP16>(
-      fbgemm::matrix_op_t::Transpose,
-      K,
-      N,
-      1,
-      weight_contig_ptr);
+      fbgemm::matrix_op_t::Transpose, K, N, 1, weight_contig_ptr);
   return cpp_custom_type_hack::create(std::move(ptr), weight.options());
 }
 
-Tensor fbgemm_linear_fp16_weight(
+Tensor fbgemm_linear_fp16_weight_fp32_activation(
     const Tensor& input,
     const Tensor& packed_weight,
     const Tensor& bias) {
@@ -358,7 +387,7 @@ Tensor fbgemm_linear_fp16_weight(
       M,
       input_ptr,
       packed_weight_fp16,
-      0.f,
+      0.0f,
       output.data<float>());
 
   // Add bias term
@@ -369,8 +398,35 @@ Tensor fbgemm_linear_fp16_weight(
   return output.view(out_sizes);
 }
 
+Tensor fbgemm_linear_fp16_weight(
+    const Tensor& input,
+    const Tensor& packed_weight,
+    const Tensor& bias) {
+  TORCH_WARN(
+      "fbgemm_linear_fp16_weight will be deprecated soon."
+      "Please use fbgemm_linear_fp16_weight_fp32_activation instead.");
+
+  return at::native::fbgemm_linear_fp16_weight_fp32_activation(
+      input, packed_weight, bias);
+}
+
 #else // USE_FBGEMM
 
+Tensor fbgemm_linear_int8_weight_fp32_activation(
+    const Tensor& /*input*/,
+    const Tensor& /*weight*/,
+    const Tensor& /*packed*/,
+    const Tensor& /*col_offsets*/,
+    Scalar /*weight_scale*/,
+    Scalar /*weight_zero_point*/,
+    const Tensor& /*bias*/) {
+  // We make a strong guarantee that models using these operators will have the
+  // same numerics across different machines. Therefore, we do not provide a
+  // fallback path and rather fail loudly if we cannot run FBGEMM.
+  TORCH_CHECK(
+      false, "This PyTorch installation was not built with FBGEMM operators");
+}
+
 Tensor fbgemm_linear_int8_weight(
     const Tensor& /*input*/,
     const Tensor& /*weight*/,
@@ -379,6 +435,10 @@ Tensor fbgemm_linear_int8_weight(
     Scalar /*weight_scale*/,
     Scalar /*weight_zero_point*/,
     const Tensor& /*bias*/) {
+  TORCH_WARN(
+      "fbgemm_linear_int8_weight will be deprecated soon."
+      "Please use fbgemm_linear_int8_weight_fp32_activation instead.");
+
   // We make a strong guarantee that models using these operators will have the
   // same numerics across different machines. Therefore, we do not provide a
   // fallback path and rather fail loudly if we cannot run FBGEMM.
@@ -395,19 +455,41 @@ std::tuple<Tensor, Tensor, double, int64_t> fbgemm_linear_quantize_weight(
       false, "This PyTorch installation was not built with FBGEMM operators");
 }
 
+Tensor fbgemm_pack_quantized_matrix(const Tensor& /*input*/) {
+  // We make a strong guarantee that models using these operators will have the
+  // same numerics across different machines. Therefore, we do not provide a
+  // fallback path and rather fail loudly if we cannot run FBGEMM.
+  TORCH_CHECK(
+      false, "This PyTorch installation was not built with FBGEMM operators");
+}
+
 Tensor fbgemm_pack_quantized_matrix(
     const Tensor& /*input*/,
     int64_t /*K*/,
     int64_t /*N*/) {
+  TORCH_WARN(
+      "fbgemm_pack_quantized_matrix(weight, K, N) will be deprecated soon."
+      "Please use fbgemm_pack_quantized_matrix(weight) instead.");
+
   // We make a strong guarantee that models using these operators will have the
   // same numerics across different machines. Therefore, we do not provide a
   // fallback path and rather fail loudly if we cannot run FBGEMM.
   TORCH_CHECK(
       false, "This PyTorch installation was not built with FBGEMM operators");
 }
 
-Tensor fbgemm_pack_gemm_matrix_fp16(
-    const Tensor& weight) {
+Tensor fbgemm_pack_gemm_matrix_fp16(const Tensor& weight) {
+  // We make a strong guarantee that models using these operators will have the
+  // same numerics across different machines. Therefore, we do not provide a
+  // fallback path and rather fail loudly if we cannot run FBGEMM.
+  TORCH_CHECK(
+      false, "This PyTorch installation was not built with FBGEMM operators");
+}
+
+Tensor fbgemm_linear_fp16_weight_fp32_activation(
+    const Tensor& input,
+    const Tensor& packed_weight,
+    const Tensor& bias) {
   // We make a strong guarantee that models using these operators will have the
   // same numerics across different machines. Therefore, we do not provide a
   // fallback path and rather fail loudly if we cannot run FBGEMM.
@@ -419,6 +501,10 @@ Tensor fbgemm_linear_fp16_weight(
     const Tensor& input,
     const Tensor& packed_weight,
     const Tensor& bias) {
+  TORCH_WARN(
+      "fbgemm_linear_fp16_weight will be deprecated soon."
+      "Please use fbgemm_linear_fp16_weight_fp32_activation instead.");
+
   // We make a strong guarantee that models using these operators will have the
   // same numerics across different machines. Therefore, we do not provide a
   // fallback path and rather fail loudly if we cannot run FBGEMM.
@@ -431,5 +517,5 @@ bool fbgemm_is_cpu_supported() {
 }
 
 #endif // USE_FBGEMM
-}
+} // namespace native
 } // namespace at
@@ -129,11 +129,11 @@ struct QuantizedCellParams {
     TORCH_CHECK(false, "matmul is not supported with quantized cell params");
   }
   Tensor linear_ih(Tensor input) const {
-    return at::fbgemm_linear_int8_weight(
+    return at::fbgemm_linear_int8_weight_fp32_activation(
         input, w_ih, packed_ih, col_offsets_ih, scale_ih, zero_point_ih, b_ih);
   }
   Tensor linear_hh(Tensor h) const {
-    return at::fbgemm_linear_int8_weight(
+    return at::fbgemm_linear_int8_weight_fp32_activation(
         h, w_hh, packed_hh, col_offsets_hh, scale_hh, zero_point_hh, b_hh);
   }
 };
 
@@ -403,13 +403,23 @@ Tensor repeat(const Tensor& self, IntArrayRef repeats) {
   std::vector<int64_t> padded_size(num_new_dimensions, 1);
   padded_size.insert(padded_size.end(), self.sizes().begin(), self.sizes().end());
   std::vector<int64_t> target_size(repeats.size());
+  bool zero_tensor = false;
   for(size_t idx = 0; idx < repeats.size(); ++idx) {
+    if (repeats[idx] == 0) {
+      zero_tensor = true;
+    }
     target_size[idx] = padded_size[idx] * repeats[idx];
   }
 
   Tensor xtensor = self.expand(padded_size);
 
   Tensor result = at::empty(target_size, self.options());
+
+  // return an empty tensor if one of the repeat dimensions is zero
+  if (zero_tensor) {
+    return result;
+  }
+
   Tensor urtensor = at::alias(result);
   for (int64_t i = 0; i < xtensor.dim(); ++i) {
     // can't unfold with step 0, so make sure step is at least 1
Original file line number	Diff line number	Diff line change
`@@ -129,11 +129,11 @@ struct QuantizedCellParams {`
`129`	`129`	`TORCH_CHECK(false, "matmul is not supported with quantized cell params");`
`130`	`130`	`}`
`131`	`131`	`Tensor linear_ih(Tensor input) const {`
`132`		`- return at::fbgemm_linear_int8_weight(`
	`132`	`+ return at::fbgemm_linear_int8_weight_fp32_activation(`
`133`	`133`	`input, w_ih, packed_ih, col_offsets_ih, scale_ih, zero_point_ih, b_ih);`
`134`	`134`	`}`
`135`	`135`	`Tensor linear_hh(Tensor h) const {`
`136`		`- return at::fbgemm_linear_int8_weight(`
	`136`	`+ return at::fbgemm_linear_int8_weight_fp32_activation(`
`137`	`137`	`h, w_hh, packed_hh, col_offsets_hh, scale_hh, zero_point_hh, b_hh);`
`138`	`138`	`}`
`139`	`139`	`};`