Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions paddle/phi/kernels/funcs/detection/bbox_util.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,23 @@ struct RangeInitFunctor {
};

template <typename T>
static void SortDescending(const phi::GPUContext &ctx,
static void SortDescending(const phi::GPUContext &dev_ctx,
const phi::DenseTensor &value,
phi::DenseTensor *value_out,
phi::DenseTensor *index_out) {
int num = static_cast<int>(value.numel());
phi::DenseTensor index_in_t;
index_in_t.Resize({num});
int *idx_in = ctx.Alloc<int>(&index_in_t);
ForRange<phi::GPUContext> for_range(ctx, num);
int *idx_in = dev_ctx.Alloc<int>(&index_in_t);
ForRange<phi::GPUContext> for_range(dev_ctx, num);
for_range(RangeInitFunctor{0, 1, idx_in});

index_out->Resize({num});
int *idx_out = ctx.Alloc<int>(index_out);
int *idx_out = dev_ctx.Alloc<int>(index_out);

const T *keys_in = value.data<T>();
value_out->Resize({num});
T *keys_out = ctx.Alloc<T>(value_out);
T *keys_out = dev_ctx.Alloc<T>(value_out);

// Determine temporary device storage requirements
size_t temp_storage_bytes = 0;
Expand All @@ -75,9 +75,9 @@ static void SortDescending(const phi::GPUContext &ctx,
num,
0,
sizeof(T) * 8,
ctx.stream());
dev_ctx.stream());
// Allocate temporary storage
auto place = ctx.GetPlace();
auto place = dev_ctx.GetPlace();
auto d_temp_storage = phi::memory_utils::Alloc(place, temp_storage_bytes);

// Run sorting operation
Expand All @@ -90,7 +90,7 @@ static void SortDescending(const phi::GPUContext &ctx,
num,
0,
sizeof(T) * 8,
ctx.stream());
dev_ctx.stream());
}

template <typename T>
Expand Down Expand Up @@ -289,7 +289,7 @@ static __global__ void NMSKernel(const int n_boxes,
}

template <typename T>
static void NMS(const phi::GPUContext &ctx,
static void NMS(const phi::GPUContext &dev_ctx,
const phi::DenseTensor &proposals,
const phi::DenseTensor &sorted_indices,
const T nms_threshold,
Expand All @@ -302,14 +302,14 @@ static void NMS(const phi::GPUContext &ctx,
dim3 threads(kThreadsPerBlock);

const T *boxes = proposals.data<T>();
auto place = ctx.GetPlace();
auto place = dev_ctx.GetPlace();
auto mask_ptr = phi::memory_utils::Alloc(
ctx.GetPlace(),
dev_ctx.GetPlace(),
boxes_num * col_blocks * sizeof(uint64_t),
phi::Stream(reinterpret_cast<phi::StreamId>(ctx.stream())));
phi::Stream(reinterpret_cast<phi::StreamId>(dev_ctx.stream())));
uint64_t *mask_dev = reinterpret_cast<uint64_t *>(mask_ptr->ptr());

NMSKernel<<<blocks, threads, 0, ctx.stream()>>>(
NMSKernel<<<blocks, threads, 0, dev_ctx.stream()>>>(
boxes_num, nms_threshold, boxes, mask_dev, pixel_offset);

std::vector<uint64_t> remv(col_blocks);
Expand All @@ -321,7 +321,7 @@ static void NMS(const phi::GPUContext &ctx,
place,
mask_dev,
boxes_num * col_blocks * sizeof(uint64_t),
ctx.stream());
dev_ctx.stream());

std::vector<int> keep_vec;
int num_to_keep = 0;
Expand All @@ -339,14 +339,14 @@ static void NMS(const phi::GPUContext &ctx,
}
}
keep_out->Resize({num_to_keep});
int *keep = ctx.Alloc<int>(keep_out);
int *keep = dev_ctx.Alloc<int>(keep_out);
phi::memory_utils::Copy(place,
keep,
phi::CPUPlace(),
keep_vec.data(),
sizeof(int) * num_to_keep,
ctx.stream());
ctx.Wait();
dev_ctx.stream());
dev_ctx.Wait();
}

} // namespace funcs
Expand Down
12 changes: 6 additions & 6 deletions paddle/phi/kernels/funcs/detection/bbox_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,13 @@ static void AppendProposals(phi::DenseTensor* dst,
}

template <class T>
void ClipTiledBoxes(const phi::DeviceContext& ctx,
void ClipTiledBoxes(const phi::DeviceContext& dev_ctx,
const phi::DenseTensor& im_info,
const phi::DenseTensor& input_boxes,
phi::DenseTensor* out,
bool is_scale = true,
bool pixel_offset = true) {
T* out_data = ctx.Alloc<T>(out);
T* out_data = dev_ctx.Alloc<T>(out);
const T* im_info_data = im_info.data<T>();
const T* input_boxes_data = input_boxes.data<T>();
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
Expand Down Expand Up @@ -195,7 +195,7 @@ void ClipTiledBoxes(const phi::DeviceContext& ctx,

// Filter the box with small area
template <class T>
void FilterBoxes(const phi::DeviceContext& ctx,
void FilterBoxes(const phi::DeviceContext& dev_ctx,
const phi::DenseTensor* boxes,
float min_size,
const phi::DenseTensor& im_info,
Expand All @@ -206,7 +206,7 @@ void FilterBoxes(const phi::DeviceContext& ctx,
const T* boxes_data = boxes->data<T>();
keep->Resize({boxes->dims()[0]});
min_size = std::max(min_size, 1.0f);
int* keep_data = ctx.Alloc<int>(keep);
int* keep_data = dev_ctx.Alloc<int>(keep);
T offset = pixel_offset ? static_cast<T>(1.0) : 0;

int keep_len = 0;
Expand Down Expand Up @@ -236,13 +236,13 @@ void FilterBoxes(const phi::DeviceContext& ctx,
}

template <class T>
static void BoxCoder(const phi::DeviceContext& ctx,
static void BoxCoder(const phi::DeviceContext& dev_ctx,
phi::DenseTensor* all_anchors,
phi::DenseTensor* bbox_deltas,
phi::DenseTensor* variances,
phi::DenseTensor* proposals,
const bool pixel_offset = true) {
T* proposals_data = ctx.Alloc<T>(proposals);
T* proposals_data = dev_ctx.Alloc<T>(proposals);

int64_t row = all_anchors->dims()[0];
int64_t len = all_anchors->dims()[1];
Expand Down
8 changes: 4 additions & 4 deletions paddle/phi/kernels/funcs/detection/nms_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,20 +127,20 @@ static inline std::vector<std::pair<T, int>> GetSortedScoreIndex(
}

template <typename T>
static inline DenseTensor VectorToTensor(const DeviceContext& ctx,
static inline DenseTensor VectorToTensor(const DeviceContext& dev_ctx,
const std::vector<T>& selected_indices,
int selected_num) {
DenseTensor keep_nms;
keep_nms.Resize({selected_num});
auto* keep_data = ctx.template Alloc<T>(&keep_nms);
auto* keep_data = dev_ctx.template Alloc<T>(&keep_nms);
for (int i = 0; i < selected_num; ++i) {
keep_data[i] = selected_indices[i];
}
return keep_nms;
}

template <class T>
DenseTensor NMS(const DeviceContext& ctx,
DenseTensor NMS(const DeviceContext& dev_ctx,
DenseTensor* bbox,
DenseTensor* scores,
T nms_threshold,
Expand Down Expand Up @@ -182,7 +182,7 @@ DenseTensor NMS(const DeviceContext& ctx,
adaptive_threshold *= eta;
}
}
return VectorToTensor(ctx, selected_indices, selected_num);
return VectorToTensor(dev_ctx, selected_indices, selected_num);
}

} // namespace funcs
Expand Down
Loading