@@ -3856,16 +3856,205 @@ LAB_IMPLEMENT_BASIC_FUNCTION(abs,abs)
38563856
38573857#if defined(TH_REAL_IS_BYTE )
38583858
3859- #define TENSOR_IMPLEMENT_LOGICAL_SUM (NAME , OP , INIT_VALUE ) \
3860- int THTensor_(NAME)(THTensor *tensor) \
3861- { \
3862- int sum = INIT_VALUE; \
3863- TH_TENSOR_APPLY(real, tensor, sum = sum OP *tensor_data;); \
3864- return sum; \
3859+ int THTensor_ (logicalAndAll )(THTensor * tensor )
3860+ {
3861+ real prod = 1 ;
3862+ int serial_path = 0 ;
3863+ #ifdef _OPENMP
3864+ int inOMP = omp_in_parallel ();
3865+ if (inOMP ) {
3866+ serial_path = 1 ;
3867+ } else {
3868+ TH_TENSOR_APPLY_REDUCTION_OMP (real , tensor , & & :prod , prod = prod && * tensor_data ;);
3869+ }
3870+ #else
3871+ serial_path = 1 ;
3872+ #endif
3873+ if (serial_path ) {
3874+ TH_TENSOR_APPLY (real , tensor , prod = prod && * tensor_data ;);
3875+ }
3876+ return prod ;
3877+ }
3878+
3879+ int THTensor_ (logicalAnyAll )(THTensor * tensor )
3880+ {
3881+ real sum = 0 ;
3882+ int serial_path = 0 ;
3883+ #ifdef _OPENMP
3884+ int inOMP = omp_in_parallel ();
3885+ if (inOMP ) {
3886+ serial_path = 1 ;
3887+ } else {
3888+ TH_TENSOR_APPLY_REDUCTION_OMP (real , tensor , || :sum , sum = sum || * tensor_data ;);
3889+ }
3890+ #else
3891+ serial_path = 1 ;
3892+ #endif
3893+ if (serial_path ) {
3894+ TH_TENSOR_APPLY (real , tensor , sum = sum || * tensor_data ;);
3895+ }
3896+ return (bool )sum ;
3897+ }
3898+
3899+ void THTensor_ (logicalAnd )(THTensor * r_ , THTensor * t , int dimension , int keepdim )
3900+ {
3901+ THLongStorage * dim ;
3902+
3903+ THArgCheck (dimension >= 0 && dimension < THTensor_ (nDimension )(t ), 2 , "dimension %d out of range" ,
3904+ dimension + TH_INDEX_BASE );
3905+
3906+ THTensor_ (preserveReduceDimSemantics )(r_ , THTensor_ (nDimension )(t ), dimension , keepdim );
3907+ dim = THTensor_ (newSizeOf )(t );
3908+ THLongStorage_set (dim , dimension , 1 );
3909+ THTensor_ (resize )(r_ , dim , NULL );
3910+ THLongStorage_free (dim );
3911+
3912+ int serial_path = 0 ;
3913+ #ifdef _OPENMP
3914+ int inOMP = omp_in_parallel ();
3915+ if (inOMP ) {
3916+ serial_path = 1 ;
3917+ } else {
3918+ int r_Contig = THTensor_ (isContiguous )(r_ );
3919+ real * tp = THTensor_ (data )(t );
3920+ real * rp = THTensor_ (data )(r_ );
3921+ if (r_Contig && (tp != rp )){
3922+ ptrdiff_t iter = 0 ;
3923+ ptrdiff_t r_Size = THTensor_ (nElement )(r_ );
3924+ int r_Dim = r_ -> nDimension ;
3925+ #pragma omp parallel for if ( r_Size > TH_OMP_OVERHEAD_THRESHOLD)
3926+ for (iter = 0 ; iter < r_Size ; iter ++ ) {
3927+ int j ;
3928+ int64_t quot ;
3929+ int64_t rem = iter ;
3930+ ptrdiff_t tBasicIndex = 0 ;
3931+
3932+ for (j = 0 ; j < r_Dim ; ++ j ) {
3933+ if (j != dimension ){
3934+ quot = rem /r_ -> stride [j ];
3935+ rem = rem %r_ -> stride [j ];
3936+ tBasicIndex += quot * t -> stride [j ];
3937+ }
3938+ }
3939+ real * t_data = tp + tBasicIndex ;
3940+ real * r__data = rp + iter ;
3941+ * r__data = 1 ;
3942+ for (j = 0 ; j < t -> size [dimension ]; ++ j ) {
3943+ * r__data = * r__data && * (t_data + j * t -> stride [dimension ]);
3944+ }
3945+ }
3946+ } else {
3947+ serial_path = 1 ;
3948+ }
3949+ }
3950+ #else
3951+ serial_path = 1 ;
3952+ #endif
3953+
3954+ if (serial_path ) {
3955+ // two implementations optimized for data locality
3956+ if (t -> stride [dimension ] == 1 ) {
3957+ TH_TENSOR_DIM_APPLY2 (real , t , real , r_ , dimension ,
3958+ accreal prod = 1 ;
3959+ int64_t i ;
3960+ for (i = 0 ; i < t_size ; i ++ )
3961+ prod = prod && t_data [i * t_stride ];
3962+ * r__data = (real )prod ;);
3963+ } else {
3964+ THTensor_ (fill )(r_ , 1 );
3965+ THTensor * temp_ = THTensor_ (newWithTensor )(r_ );
3966+ // r_.expand_as(t)
3967+ temp_ -> size [dimension ] = t -> size [dimension ];
3968+ temp_ -> stride [dimension ] = 0 ;
3969+
3970+ TH_TENSOR_APPLY2 (real , temp_ , real , t , * temp__data = * temp__data && * t_data ;);
3971+ THTensor_ (free )(temp_ );
3972+ }
3973+ }
3974+ if (!keepdim ) {
3975+ THTensor_ (squeeze1d )(r_ , r_ , dimension );
3976+ }
3977+ }
3978+
3979+ void THTensor_ (logicalAny )(THTensor * r_ , THTensor * t , int dimension , int keepdim )
3980+ {
3981+ THLongStorage * dim ;
3982+
3983+ THArgCheck (dimension >= 0 && dimension < THTensor_ (nDimension )(t ), 2 , "dimension %d out of range" ,
3984+ dimension + TH_INDEX_BASE );
3985+
3986+ THTensor_ (preserveReduceDimSemantics )(r_ , THTensor_ (nDimension )(t ), dimension , keepdim );
3987+ dim = THTensor_ (newSizeOf )(t );
3988+ THLongStorage_set (dim , dimension , 1 );
3989+ THTensor_ (resize )(r_ , dim , NULL );
3990+ THLongStorage_free (dim );
3991+
3992+ int serial_path = 0 ;
3993+ #ifdef _OPENMP
3994+ int inOMP = omp_in_parallel ();
3995+ if (inOMP ) {
3996+ serial_path = 1 ;
3997+ } else {
3998+ int r_Contig = THTensor_ (isContiguous )(r_ );
3999+ real * tp = THTensor_ (data )(t );
4000+ real * rp = THTensor_ (data )(r_ );
4001+ if (r_Contig && (tp != rp )){
4002+ ptrdiff_t iter = 0 ;
4003+ ptrdiff_t r_Size = THTensor_ (nElement )(r_ );
4004+ int r_Dim = r_ -> nDimension ;
4005+ #pragma omp parallel for if ( r_Size > TH_OMP_OVERHEAD_THRESHOLD)
4006+ for (iter = 0 ; iter < r_Size ; iter ++ ) {
4007+ int j ;
4008+ int64_t quot ;
4009+ int64_t rem = iter ;
4010+ ptrdiff_t tBasicIndex = 0 ;
4011+
4012+ for (j = 0 ; j < r_Dim ; ++ j ) {
4013+ if (j != dimension ){
4014+ quot = rem /r_ -> stride [j ];
4015+ rem = rem %r_ -> stride [j ];
4016+ tBasicIndex += quot * t -> stride [j ];
4017+ }
4018+ }
4019+ real * t_data = tp + tBasicIndex ;
4020+ real * r__data = rp + iter ;
4021+ * r__data = 0 ;
4022+ for (j = 0 ; j < t -> size [dimension ]; ++ j ) {
4023+ * r__data = * r__data || * (t_data + j * t -> stride [dimension ]);
4024+ }
4025+ }
4026+ } else {
4027+ serial_path = 1 ;
4028+ }
38654029 }
4030+ #else
4031+ serial_path = 1 ;
4032+ #endif
4033+ if (serial_path ) {
4034+ // two implementations optimized for data locality
4035+ if (t -> stride [dimension ] == 1 ) {
4036+ TH_TENSOR_DIM_APPLY2 (real , t , real , r_ , dimension ,
4037+ accreal sum = 0 ;
4038+ int64_t i ;
4039+ for (i = 0 ; i < t_size ; i ++ )
4040+ sum = sum || t_data [i * t_stride ];
4041+ * r__data = (real )sum ;);
4042+ } else {
4043+ THTensor_ (zero )(r_ );
4044+ THTensor * temp_ = THTensor_ (newWithTensor )(r_ );
4045+ // r_.expand_as(t)
4046+ temp_ -> size [dimension ] = t -> size [dimension ];
4047+ temp_ -> stride [dimension ] = 0 ;
38664048
3867- TENSOR_IMPLEMENT_LOGICAL_SUM (logicalall , & & , 1 )
3868- TENSOR_IMPLEMENT_LOGICAL_SUM (logicalany , || , 0 )
4049+ TH_TENSOR_APPLY2 (real , temp_ , real , t , * temp__data = * temp__data || * t_data ;);
4050+ THTensor_ (free )(temp_ );
4051+ }
4052+ }
4053+
4054+ if (!keepdim ) {
4055+ THTensor_ (squeeze1d )(r_ , r_ , dimension );
4056+ }
4057+ }
38694058
38704059#endif /* Byte only part */
38714060
0 commit comments