|  | 
|  | 1 | +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | 
|  | 2 | +
 | 
|  | 3 | +Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 4 | +you may not use this file except in compliance with the License. | 
|  | 5 | +You may obtain a copy of the License at | 
|  | 6 | +
 | 
|  | 7 | + http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 8 | +
 | 
|  | 9 | +Unless required by applicable law or agreed to in writing, software | 
|  | 10 | +distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 11 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 12 | +See the License for the specific language governing permissions and | 
|  | 13 | +limitations under the License. */ | 
|  | 14 | + | 
|  | 15 | +#pragma once | 
|  | 16 | +#include "paddle/framework/eigen.h" | 
|  | 17 | +#include "paddle/framework/op_registry.h" | 
|  | 18 | + | 
|  | 19 | +namespace paddle { | 
|  | 20 | +namespace operators { | 
|  | 21 | + | 
|  | 22 | +using Tensor = framework::Tensor; | 
|  | 23 | + | 
|  | 24 | +template <typename T, int MajorType = Eigen::RowMajor, | 
|  | 25 | + typename IndexType = Eigen::DenseIndex> | 
|  | 26 | +using EigenVector = framework::EigenVector<T, MajorType, IndexType>; | 
|  | 27 | + | 
|  | 28 | +template <typename Place, typename T> | 
|  | 29 | +class AucKernel : public framework::OpKernel<T> { | 
|  | 30 | + public: | 
|  | 31 | + void Compute(const framework::ExecutionContext& ctx) const override { | 
|  | 32 | + auto* inference = ctx.Input<Tensor>("Inference"); | 
|  | 33 | + auto* label = ctx.Input<Tensor>("Label"); | 
|  | 34 | + auto* auc = ctx.Output<Tensor>("AUC"); | 
|  | 35 | + | 
|  | 36 | + float* auc_data = auc->mutable_data<float>(ctx.GetPlace()); | 
|  | 37 | + | 
|  | 38 | + std::string curve = ctx.Attr<std::string>("curve"); | 
|  | 39 | + int num_thresholds = ctx.Attr<int>("num_thresholds"); | 
|  | 40 | + std::vector<float> thresholds_list; | 
|  | 41 | + thresholds_list.reserve(num_thresholds); | 
|  | 42 | + for (int i = 1; i < num_thresholds - 1; i++) { | 
|  | 43 | + thresholds_list[i] = (float)i / (num_thresholds - 1); | 
|  | 44 | + } | 
|  | 45 | + const float kEpsilon = 1e-7; | 
|  | 46 | + thresholds_list[0] = 0.0f - kEpsilon; | 
|  | 47 | + thresholds_list[num_thresholds - 1] = 1.0f + kEpsilon; | 
|  | 48 | + | 
|  | 49 | + size_t num_samples = inference->numel(); | 
|  | 50 | + | 
|  | 51 | + const T* inference_data = inference->data<T>(); | 
|  | 52 | + Tensor label_casted; | 
|  | 53 | + label_casted.Resize(label->dims()); | 
|  | 54 | + bool* label_casted_data = label_casted.mutable_data<bool>(ctx.GetPlace()); | 
|  | 55 | + | 
|  | 56 | + const int* label_data = label->data<int>(); | 
|  | 57 | + // cast label_data to bool | 
|  | 58 | + for (size_t i = 0; i < num_samples; i++) { | 
|  | 59 | + label_casted_data[i] = static_cast<bool>(label_data[i]); | 
|  | 60 | + } | 
|  | 61 | + | 
|  | 62 | + // Create local tensor for storing the curve: TP, FN, TN, FP | 
|  | 63 | + // TODO(typhoonzero): use eigen op to caculate these values. | 
|  | 64 | + Tensor true_positive, false_positive, true_negative, false_negative; | 
|  | 65 | + | 
|  | 66 | + true_positive.Resize({num_thresholds}); | 
|  | 67 | + false_negative.Resize({num_thresholds}); | 
|  | 68 | + true_negative.Resize({num_thresholds}); | 
|  | 69 | + false_positive.Resize({num_thresholds}); | 
|  | 70 | + | 
|  | 71 | + int* tp_data = true_positive.mutable_data<int>(ctx.GetPlace()); | 
|  | 72 | + int* fn_data = false_negative.mutable_data<int>(ctx.GetPlace()); | 
|  | 73 | + int* tn_data = true_negative.mutable_data<int>(ctx.GetPlace()); | 
|  | 74 | + int* fp_data = false_positive.mutable_data<int>(ctx.GetPlace()); | 
|  | 75 | + | 
|  | 76 | + for (int idx_thresh = 0; idx_thresh < num_thresholds; idx_thresh++) { | 
|  | 77 | + // caculate TP, FN, TN, FP for current thresh | 
|  | 78 | + int tp = 0, fn = 0, tn = 0, fp = 0; | 
|  | 79 | + for (size_t i = 0; i < num_samples; i++) { | 
|  | 80 | + if (label_casted_data[i]) { | 
|  | 81 | + if (inference_data[i] >= (thresholds_list[idx_thresh])) { | 
|  | 82 | + tp++; | 
|  | 83 | + } else { | 
|  | 84 | + fn++; | 
|  | 85 | + } | 
|  | 86 | + } else { | 
|  | 87 | + if (inference_data[i] >= (thresholds_list[idx_thresh])) { | 
|  | 88 | + fp++; | 
|  | 89 | + } else { | 
|  | 90 | + tn++; | 
|  | 91 | + } | 
|  | 92 | + } | 
|  | 93 | + } | 
|  | 94 | + // store rates | 
|  | 95 | + tp_data[idx_thresh] = tp; | 
|  | 96 | + fn_data[idx_thresh] = fn; | 
|  | 97 | + tn_data[idx_thresh] = tn; | 
|  | 98 | + fp_data[idx_thresh] = fp; | 
|  | 99 | + } | 
|  | 100 | + // epsilon to avoid divide by zero. | 
|  | 101 | + float epsilon = 1e-6; | 
|  | 102 | + // Riemann sum to caculate auc. | 
|  | 103 | + Tensor tp_rate, fp_rate, rec_rate; | 
|  | 104 | + tp_rate.Resize({num_thresholds}); | 
|  | 105 | + fp_rate.Resize({num_thresholds}); | 
|  | 106 | + rec_rate.Resize({num_thresholds}); | 
|  | 107 | + float* tp_rate_data = tp_rate.mutable_data<float>(ctx.GetPlace()); | 
|  | 108 | + float* fp_rate_data = fp_rate.mutable_data<float>(ctx.GetPlace()); | 
|  | 109 | + float* rec_rate_data = rec_rate.mutable_data<float>(ctx.GetPlace()); | 
|  | 110 | + for (int i = 0; i < num_thresholds; i++) { | 
|  | 111 | + tp_rate_data[i] = | 
|  | 112 | + ((float)tp_data[i] + epsilon) / (tp_data[i] + fn_data[i] + epsilon); | 
|  | 113 | + fp_rate_data[i] = (float)fp_data[i] / (fp_data[i] + tn_data[i] + epsilon); | 
|  | 114 | + rec_rate_data[i] = | 
|  | 115 | + ((float)tp_data[i] + epsilon) / (tp_data[i] + fp_data[i] + epsilon); | 
|  | 116 | + } | 
|  | 117 | + *auc_data = 0.0f; | 
|  | 118 | + if (curve == "ROC") { | 
|  | 119 | + for (int i = 0; i < num_thresholds - 1; i++) { | 
|  | 120 | + auto dx = fp_rate_data[i] - fp_rate_data[i + 1]; | 
|  | 121 | + auto y = (tp_rate_data[i] + tp_rate_data[i + 1]) / 2.0f; | 
|  | 122 | + *auc_data = *auc_data + dx * y; | 
|  | 123 | + } | 
|  | 124 | + } else if (curve == "PR") { | 
|  | 125 | + for (int i = 1; i < num_thresholds; i++) { | 
|  | 126 | + auto dx = tp_rate_data[i] - tp_rate_data[i - 1]; | 
|  | 127 | + auto y = (rec_rate_data[i] + rec_rate_data[i - 1]) / 2.0f; | 
|  | 128 | + *auc_data = *auc_data + dx * y; | 
|  | 129 | + } | 
|  | 130 | + } | 
|  | 131 | + } | 
|  | 132 | +}; | 
|  | 133 | + | 
|  | 134 | +} // namespace operators | 
|  | 135 | +} // namespace paddle | 
0 commit comments