Skip to content

Commit d9031a3

Browse files
committed
fix gpu transform offset when copying poses
1 parent eb3f9d9 commit d9031a3

File tree

6 files changed

+44
-16
lines changed

6 files changed

+44
-16
lines changed

.github/workflows/build.yml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ name: Build
33
on:
44
push:
55
branches: [main, dev]
6+
workflow_dispatch:
67

78
jobs:
89
build-linux-all:
@@ -105,9 +106,18 @@ jobs:
105106
release:
106107
runs-on: ubuntu-latest
107108
needs: [build-linux-all, build-windows-pybind]
108-
permissions:
109-
contents: write
109+
permissions: write-all
110110
steps:
111+
- name: Checkout
112+
uses: actions/checkout@v3
113+
with:
114+
submodules: "false"
115+
fetch-depth: 0
116+
fetch-tags: true
117+
- name: Read branch file
118+
id: getbranch
119+
shell: bash
120+
run: echo branch=$(git describe --tags --exact-match HEAD || echo nightly) >> $GITHUB_OUTPUT
111121
- name: Download wheels
112122
uses: actions/download-artifact@v4
113123
with:
@@ -145,7 +155,7 @@ jobs:
145155
env:
146156
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
147157
with:
148-
tag_name: nightly
158+
tag_name: ${{ steps.getbranch.outputs.branch }}
149159
name: 'Nightly Release'
150160
prerelease: true
151161
body: 'SAPIEN development nightly release. This release is mainly for internal testing. Stable releases are published to pypi https://pypi.org/project/sapien/'

include/sapien/sapien_renderer/batched_render_system.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ class BatchedRenderSystem {
6363

6464
std::vector<std::shared_ptr<BatchedCamera>> mCameraBatches;
6565

66+
// size of a mat4 element in the transform buffer
67+
int mTransformBufferElementByteOffset{0};
68+
6669
int mShapeCount{0};
6770
CudaArray mCudaSceneTransformRefBuffer;
6871
CudaArray mCudaShapeDataBuffer;

src/sapien_renderer/batched_render_system.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,17 @@ void BatchedRenderSystem::init() {
162162
// cache current versions
163163
mSceneVersions.push_back(system->getScene()->getVersion());
164164

165-
sceneTransformRefs.push_back(system->getTransformCudaArray().ptr);
165+
auto transformArray = system->getTransformCudaArray();
166+
sceneTransformRefs.push_back(transformArray.ptr);
167+
168+
if (mTransformBufferElementByteOffset == 0) {
169+
mTransformBufferElementByteOffset = transformArray.strides.at(0);
170+
if (mTransformBufferElementByteOffset % 4 != 0) {
171+
throw std::runtime_error("corrupted transform array buffer");
172+
}
173+
} else if (mTransformBufferElementByteOffset != transformArray.strides.at(0)) {
174+
throw std::runtime_error("corrupted transform array buffer");
175+
}
166176

167177
for (auto &body : system->getRenderBodyComponents()) {
168178
for (auto &shape : body->getRenderShapes()) {
@@ -284,8 +294,9 @@ void BatchedRenderSystem::update() {
284294

285295
// upload data
286296
update_object_transforms(
287-
(float **)mCudaSceneTransformRefBuffer.ptr, (RenderShapeData *)mCudaShapeDataBuffer.ptr,
288-
(float *)mCudaPoseHandle.ptr, mCudaPoseHandle.shape.at(1), mShapeCount, mCudaStream);
297+
(float **)mCudaSceneTransformRefBuffer.ptr, mTransformBufferElementByteOffset / 4,
298+
(RenderShapeData *)mCudaShapeDataBuffer.ptr, (float *)mCudaPoseHandle.ptr,
299+
mCudaPoseHandle.shape.at(1), mShapeCount, mCudaStream);
289300

290301
update_camera_transforms((CameraData *)mCudaCameraDataBuffer.ptr, (float *)mCudaPoseHandle.ptr,
291302
mCudaPoseHandle.shape.at(1), mCameraCount, mCudaStream);

src/sapien_renderer/batched_render_system.cu

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ inline CUDA_CALLABLE void PoseToMatrix(float *result, Pose const &pose, Vec3 con
3939

4040
__global__ void update_object_transforms_kernel(
4141
float *__restrict__ *__restrict__ scene_transform_buffers, // output buffers
42-
RenderShapeData *__restrict__ shapes,
42+
int transform_stride, RenderShapeData *__restrict__ shapes,
4343
float *__restrict__ poses, // parent pose array
4444
int pose_stride, int count) {
4545
int g = blockIdx.x * blockDim.x + threadIdx.x;
@@ -63,7 +63,7 @@ __global__ void update_object_transforms_kernel(
6363
int scene_index = shape.sceneIndex;
6464
int transform_index = shape.transformIndex;
6565

66-
PoseToMatrix(scene_transform_buffers[scene_index] + transform_index * 16, p, scale);
66+
PoseToMatrix(scene_transform_buffers[scene_index] + transform_index * transform_stride, p, scale);
6767
}
6868

6969
__global__ void update_camera_transforms_kernel(CameraData *cameras, float *poses, int pose_stride,
@@ -94,11 +94,12 @@ __global__ void update_camera_transforms_kernel(CameraData *cameras, float *pose
9494

9595
constexpr int BLOCK_SIZE = 128;
9696

97-
void update_object_transforms(float **scene_transform_buffers, RenderShapeData *render_shapes,
98-
float *poses, int pose_stride, int count, CUstream_st *stream) {
97+
void update_object_transforms(float **scene_transform_buffers, int transform_stride,
98+
RenderShapeData *render_shapes, float *poses, int pose_stride,
99+
int count, CUstream_st *stream) {
99100
update_object_transforms_kernel<<<(count + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE, 0,
100-
(cudaStream_t)stream>>>(scene_transform_buffers, render_shapes,
101-
poses, pose_stride, count);
101+
(cudaStream_t)stream>>>(
102+
scene_transform_buffers, transform_stride, render_shapes, poses, pose_stride, count);
102103
}
103104

104105
void update_camera_transforms(CameraData *cameras, float *poses, int pose_stride, int count,

src/sapien_renderer/batched_render_system.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ struct CameraData {
2828
* count: size of the render_shapes array
2929
* stream: cuda stream
3030
* */
31-
void update_object_transforms(float **scene_transform_buffers, RenderShapeData *render_shapes,
32-
float *poses, int pose_stride, int count, CUstream_st *stream);
31+
void update_object_transforms(float **scene_transform_buffers, int transform_stride,
32+
RenderShapeData *render_shapes, float *poses, int pose_stride,
33+
int count, CUstream_st *stream);
3334

3435
/** The first 32 numbers must be are view matrix and inverse view matrix */
3536
void update_camera_transforms(CameraData *cameras, float *poses, int pose_stride, int count,

src/sapien_renderer/sapien_renderer_system.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,11 @@ void SapienRendererSystem::step() {
170170

171171
CudaArrayHandle SapienRendererSystem::getTransformCudaArray() {
172172
mScene->prepareObjectTransformBuffer();
173+
int offset = mScene->getGpuTransformBufferSize();
174+
173175
auto buffer = mScene->getObjectTransformBuffer();
174-
return CudaArrayHandle{.shape = {static_cast<int>(buffer->getSize() / 64), 4, 4},
175-
.strides = {64, 16, 4},
176+
return CudaArrayHandle{.shape = {static_cast<int>(buffer->getSize() / offset), 4, 4},
177+
.strides = {offset, 16, 4},
176178
.type = "f4",
177179
.cudaId = buffer->getCudaDeviceId(),
178180
.ptr = buffer->getCudaPtr()};

0 commit comments

Comments
 (0)