I have an array of pointers to an abstract class A. These pointers point to objects of class B,C and they all derive from A. The sizes of these derived classes are not the same, so they have a function called size that returns a size_t of their size.
MCVE:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <vector>
class A {
public:
float a;
A() {}
__host__ __device__ virtual size_t size() = 0;
};
class B : public A {
public:
float b;
B() {}
__host__ __device__ virtual size_t size() override { return sizeof(*this); }
};
class C : public A {
public:
float c, d;
C() {}
__host__ __device__ virtual size_t size() override { return sizeof(*this); }
};
__global__ void testKernel(A** objects, int numObjects) {
for (int i = 0; i < numObjects; i++) {
printf("%d\n", objects[i]->size());
}
}
int main()
{
std::vector<A*> host_pointers;
host_pointers.push_back(new B());
host_pointers.push_back(new C());
cudaError_t cudaStatus;
std::vector<A*> device_pointers;
for (auto obj : host_pointers) {
A* device_pointer;
cudaStatus = cudaMalloc((void**)&device_pointer, obj->size());
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed for size %d\n", obj->size());
exit(-1);
}
cudaStatus = cudaMemcpy(device_pointer, obj, obj->size(), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed for size %d\n", obj->size());
exit(-1);
}
device_pointers.push_back(device_pointer);
}
///By this point, both objects should have been copied over
///to device memory, and I should have valid pointers to them
A** array_of_device_pointers;
cudaStatus = cudaMalloc((void**)&array_of_device_pointers, device_pointers.size() * sizeof(A*));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed\n");
exit(-1);
}
cudaStatus = cudaMemcpy(array_of_device_pointers, device_pointers.data(), device_pointers.size() * sizeof(A*), cudaMemcpyHostToDevice);
testKernel<<<1, 1>>>(array_of_device_pointers, device_pointers.size());
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "kernel failed, reason: %s\n", cudaGetErrorString(cudaStatus));
}
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSynchronize failed\n");
exit(-1);
}
}
What I want to do: have objects derived from A in a kernel
How I'm trying to do it: copy the objects to device memory one by one (since their sizes aren't the same), then copy an array of their device pointers to device memory, and then pass a device pointer to that onto a kernel
What I experience: When I run the program with the Nsight debugger, it stops at the line
printf("%d\n", objects[i]->size());, (I'm guessing) meaning that object[0] is not a valid pointer.
Not sure if it matters with something this simple, but I'm running this on a GPU with compute capability 8.6, though compiling for compute capability 5.2