File tree
4 files changed
+6
-3
lines changed- test
- third_party
4 files changed
+6
-3
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1 | 1 | | |
| 2 | + | |
| 3 | + | |
2 | 4 | | |
3 | 5 | | |
4 | 6 | | |
| |||
8 | 10 | | |
9 | 11 | | |
10 | 12 | | |
| 13 | + | |
11 | 14 | | |
12 | 15 | | |
13 | 16 | | |
| |||
Submodule cudnn_frontend updated 21 files
- CMakeLists.txt+1-5
- Doxyfile+2-2
- README.md-7
- docs/html/dynsections.js-7
- docs/html/graph_legend.html+7-32
- docs/html/index.html+8-33
- docs/html/navtree.css-146
- docs/html/navtree.js-517
- docs/html/navtreedata.js-12
- docs/html/navtreeindex0.js-5
- docs/html/resize.js-114
- docs/xml/d8/dcc/namespacestd.xml+32-1
- docs/xml/index.xml+2.5k
- include/cudnn_frontend.h+2-2
- samples/CMakeLists.txt+1-2
- samples/fusion_sample.cpp+12-13
- samples/fusion_sample.h+1-2
- samples/helpers.h+1-1
- samples/norm_samples.cpp-457
- samples/norm_samples.h-138
- samples/test_list.cpp+11-106
- CHANGELOG.md+1-7
- CUDA.cmake-2
- PUBLICATIONS.md+1-1
- README.md+1-8
- examples/13_two_tensor_op_fusion/threadblock/b2b_mma_multistage.h+1-9
- examples/13_two_tensor_op_fusion/threadblock/b2b_mma_multistage_smem_accumulator.h+1-9
- examples/45_dual_gemm/threadblock/dual_mma_multistage.h+7-4
- examples/47_ampere_gemm_universal_streamk/CMakeLists.txt-4
- examples/47_ampere_gemm_universal_streamk/ampere_gemm_universal_streamk.cu+3-3
- examples/47_ampere_gemm_universal_streamk/ampere_gemm_universal_streamk_broadcast.cu-653
- include/cute/algorithm/tuple_algorithms.hpp+1-1
- include/cute/arch/util.hpp+15-19
- include/cute/atom/mma_atom.hpp+3-3
- include/cute/config.hpp+1-1
- include/cute/container/cuda_types.hpp+3-15
- include/cutlass/epilogue/collective/builders/sm90_builder.inl+50-37
- include/cutlass/epilogue/collective/default_epilogue.hpp-3
- include/cutlass/epilogue/collective/sm90_epilogue_tma_warpspecialized.hpp+36-60
- include/cutlass/epilogue/collective/sm90_epilogue_tma_warpspecialized_bias_elementwise.hpp+93-214
- include/cutlass/epilogue/thread/activation.h+3-3
- include/cutlass/epilogue/thread/linear_combination_bias_elementwise.h+1-1
- include/cutlass/epilogue/threadblock/default_epilogue_tensor_op_row_broadcast.h-183
- include/cutlass/epilogue/threadblock/default_epilogue_with_broadcast.h-62
- include/cutlass/epilogue/threadblock/epilogue_streamk_with_broadcast.h-443
- include/cutlass/epilogue/threadblock/epilogue_with_broadcast.h-178
- include/cutlass/epilogue/threadblock/predicated_tile_iterator_row_broadcast.h-519
- include/cutlass/gemm/device/gemm_sparse_row_broadcast.h-514
- include/cutlass/gemm/device/gemm_universal_adapter.h+21-21
- include/cutlass/gemm/device/gemm_universal_streamk_with_broadcast.h-386
- include/cutlass/gemm/device/gemv.h+9-22
- include/cutlass/gemm/device/gemv_strided_batched.h+167
- include/cutlass/gemm/gemm.h+7-19
- include/cutlass/gemm/kernel/default_gemm_sparse_row_broadcast.h-191
- include/cutlass/gemm/kernel/default_gemm_streamk_with_broadcast.h-146
- include/cutlass/gemm/kernel/gemm_streamk_with_fused_epilogue.h-2.4k
- include/cutlass/gemm/kernel/gemv.h+26-375
- include/cutlass/gemm/kernel/gemv_strided_batched.h+368
- include/cutlass/gemm/kernel/sm70_gemm.hpp+8-4
- include/cutlass/gemm/kernel/sm90_gemm_tma.hpp+11-8
- include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized.hpp+9-7
- include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_cooperative.hpp+9-7
- include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp+14-12
- include/cutlass/gemm/kernel/sm90_tile_scheduler.hpp+3-3
- include/cutlass/gemm/kernel/sparse_gemm_row_broadcast.h-400
- include/cutlass/gemm/threadblock/ell_mma_multistage.h-6
- include/cutlass/gemm/threadblock/mma_blas3_multistage.h-5
- include/cutlass/gemm/threadblock/mma_layernorm_mainloop_fusion_multistage.h+6-4
- include/cutlass/gemm/threadblock/mma_multistage.h+6-4
- include/cutlass/gemm/threadblock/mma_planar_complex_multistage.h-6
- include/cutlass/gemm/threadblock/mma_softmax_mainloop_fusion_multistage.h-5
- include/cutlass/gemm/threadblock/mma_sparse_multistage.h-6
- include/cutlass/gemm/threadblock/mma_with_reduction_multistage.h+6-4
- include/cutlass/numeric_conversion.h+83-99
- include/cutlass/transform/threadblock/predicated_tile_access_iterator.h+2-2
- media/docs/cutlass_3x_backwards_compatibility.md+2-2
- media/docs/gemm_api.md+1-1
- media/docs/layout.md+1-1
- media/docs/profiler.md+4-16
- test/unit/gemm/device/CMakeLists.txt+11
- test/unit/gemm/device/gemm_f16n_f16n_f16t_tensor_op_f32_sparse_sm80.cu-19
- test/unit/gemm/device/gemv.cu+44-187
- test/unit/gemm/device/gemv_strided_batched.cu+490
- test/unit/gemm/device/sm90_gemm_f16_f16_f16_tensor_op_f32_cluster_warpspecialized_cooperative.cu+2-76
- test/unit/gemm/device/sm90_gemm_f16_f16_f16_tensor_op_f32_cluster_warpspecialized_cooperative_bias_elementwise.cu-44
- test/unit/gemm/device/sm90_gemm_f16_f16_f16_tensor_op_f32_cluster_warpspecialized_pingpong.cu+16-102
- test/unit/gemm/device/sm90_gemm_f16_f16_f16_tensor_op_f32_cluster_warpspecialized_pingpong_bias_elementwise.cu-44
- test/unit/gemm/device/sm90_gemm_s8_s8_s8_tensor_op_s32.cu-144
- test/unit/gemm/device/testbed_sparse.h+7-21
- tools/library/scripts/generator.py+20-56
- tools/library/src/reference/gemm.cu+352
- tools/profiler/src/cublas_helpers.cu+2-15
- .github/ISSUE_TEMPLATE/bug-report.yml+3-19
- .github/workflows/ci.yml+13-153
- .github/workflows/configure.yml+2-6
- .github/workflows/format.yml-2
- .github/workflows/labeler.yml+1-5
- .github/workflows/pip.yml+4-4
- .github/workflows/upstream.yml-2
- .gitignore-1
- .pre-commit-config.yaml+17-17
- CMakeLists.txt-2
- MANIFEST.in-1
- docs/advanced/exceptions.rst+3-6
- docs/advanced/misc.rst-37
- docs/changelog.rst+1-123
- docs/conf.py+1
- include/pybind11/attr.h+1-1
- include/pybind11/cast.h+5-9
- include/pybind11/detail/class.h+1-9
- include/pybind11/detail/common.h+34-82
- include/pybind11/detail/init.h+15-21
- include/pybind11/detail/internals.h+3-31
- include/pybind11/detail/type_caster_base.h-9
- include/pybind11/eigen.h+702-1
- include/pybind11/eigen/matrix.h-701
- include/pybind11/eigen/tensor.h-511
- include/pybind11/embed.h+42-80
- include/pybind11/functional.h+2-9
- include/pybind11/gil.h+4-4
- include/pybind11/numpy.h+12-13
- include/pybind11/options.h-16
- include/pybind11/pybind11.h+57-81
- include/pybind11/pytypes.h+22-42
- include/pybind11/stl_bind.h+47-107
- pybind11/__main__.py+1-7
- pybind11/_version.py+1-1
- pybind11/setup_helpers.py+4-3
- setup.cfg+2-2
- tests/CMakeLists.txt+4-29
- tests/conftest.py+1-25
- tests/eigen_tensor_avoid_stl_array.cpp-14
- tests/extra_python_package/test_files.py+3-7
- tests/test_builtin_casters.cpp+2-7
- tests/test_callbacks.cpp-37
- tests/test_callbacks.py-13
- tests/test_chrono.py+4
- tests/test_class.cpp+3-3
- tests/test_class.py+2-10
- tests/test_constants_and_functions.cpp+21-13
- tests/test_custom_type_casters.py+1-2
- tests/test_docstring_options.cpp-53
- tests/test_docstring_options.py-23
- tests/test_eigen.cpp+5-3
- tests/test_eigen.py+1-1
- tests/test_eigen_tensor.cpp-18
- tests/test_eigen_tensor.inl-333
- tests/test_eigen_tensor.py-290
- tests/test_embed/catch.cpp+3-1
- tests/test_embed/test_interpreter.cpp+3-70
- tests/test_exceptions.py+2-3
- tests/test_gil_scoped.py+2-2
- tests/test_kwargs_and_defaults.cpp+5-4
- tests/test_local_bindings.py+1-2
- tests/test_modules.py+5-4
- tests/test_numpy_array.cpp-2
- tests/test_numpy_array.py-6
- tests/test_operator_overloading.cpp+15-8
- tests/test_operator_overloading.py+1
- tests/test_pytypes.cpp-2
- tests/test_pytypes.py-6
- tests/test_stl_binders.py-26
- tests/test_virtual_functions.cpp+1-2
- tools/FindPythonLibsNew.cmake+2-4
- tools/make_changelog.py+5-4
- tools/pybind11Common.cmake-10
- tools/pybind11NewTools.cmake+2-2
- tools/pybind11Tools.cmake+1-1
- tools/setup_global.py.in+1-3
- tools/setup_main.py.in-2
0 commit comments