As other answers point out, the use of malloc (or even new) should be avoided in c++. Anyway, as you requested:
I require a logic for using malloc in this situation instead of new / delete because I have to do this in cuda...
In this case you have to allocate memory for the Hessian instances first, then iterate throug them and allocate memory for each Dxx, Dxy and Dyy. I would create a function for this like follows:
Hessian* create(size_t length) {
Hessian* obj = (Hessian*)malloc(length * sizeof(Hessian));
for(size_t i = 0; i < length; ++i) {
obj[i].Dxx = (float*)malloc(sizeof(float));
obj[i].Dxy = (float*)malloc(sizeof(float));
obj[i].Dyy = (float*)malloc(sizeof(float));
}
return obj;
}
To deallocate the memory you allocated with create function above, you have to iterate through Hessian instances and deallocate each Dxx, Dxy and Dyy first, then deallocate the block which stores the Hessian instances:
void destroy(Hessian* obj, size_t length) {
for(size_t i = 0; i < length; ++i) {
free(obj[i].Dxx);
free(obj[i].Dxy);
free(obj[i].Dyy);
}
free(obj);
}
Note: using the presented method will pass the responsibility of preventing memory leaks to you.
If you wish to use the std::vector instead of manual allocation and deallocation (which is highly recommended), you can write a custom allocator for it to use cudaMalloc and cudaFree like follows:
template<typename T> struct cuda_allocator {
using value_type = T;
cuda_allocator() = default;
template<typename U> cuda_allocator(const cuda_allocator<U>&) {
}
T* allocate(std::size_t count) {
if(count <= max_size()) {
void* raw_ptr = nullptr;
if(cudaMalloc(&raw_ptr, count * sizeof(T)) == cudaSuccess)
return static_cast<T*>(raw_ptr);
}
throw std::bad_alloc();
}
void deallocate(T* raw_ptr, std::size_t) {
cudaFree(raw_ptr);
}
static std::size_t max_size() {
return std::numeric_limits<std::size_t>::max() / sizeof(T);
}
};
template<typename T, typename U>
inline bool operator==(const cuda_allocator<T>&, const cuda_allocator<U>&) {
return true;
}
template<typename T, typename U>
inline bool operator!=(const cuda_allocator<T>& a, const cuda_allocator<U>& b) {
return !(a == b);
}
The usage of an custom allocator is very simple, you just have to specify it as second template parameter of std::vector:
struct Hessian {
std::vector<float, cuda_allocator<float>> Dxx;
std::vector<float, cuda_allocator<float>> Dxy;
std::vector<float, cuda_allocator<float>> Dyy;
};
/* ... */
std::vector<Hessian, cuda_allocator<Hessian>> hessian;