rank_obj.cu

1.xgboost训练流程图

xgboost训练流程

二.利用xgboost进行pairwise排序

rank_obj.cu UML类图

rank:pairwise相关数据类型

/*! \brief helper information in a list */ 

struct ListEntry {

 /*! \brief the predict score we in the data */ 

 bst_float pred;  

 /*! \brief the actual label of the entry */ 

 bst_float label;  

 /*! \brief row index in the data matrix */ 

 unsigned rindex;  

 // constructor 

 ListEntry(bst_float pred, bst_float label, unsigned rindex)  

   : pred(pred), label(label), rindex(rindex) {}  

 // comparator by prediction 

 inline  static  bool CmpPred(const ListEntry &a, const ListEntry &b) {

   return a.pred > b.pred;

 }  

 // comparator by label 

 inline  static  bool CmpLabel(const ListEntry &a, const ListEntry &b) {

   return a.label > b.label;

 }  

};  

/*! \brief a pair in the lambda rank */  //pos_index neg_index weights 

struct LambdaPair {

 /*! \brief positive index: this is a position in the list */ 

 unsigned pos_index;  

 /*! \brief negative index: this is a position in the list */ 

 unsigned neg_index;  

 /*! \brief weight to be filled in */ 

 bst_float weight;  

 // constructor 

 LambdaPair(unsigned pos_index, unsigned neg_index)  

   : pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}  

 // constructor 

 LambdaPair(unsigned pos_index, unsigned neg_index, bst_float weight)  

   : pos_index(pos_index), neg_index(neg_index), weight(weight) {}  

};

pairwise loss 计算相关

template <typename LambdaWeightComputerT>

class LambdaRankObj : public ObjFunction {

public:  

 void GetGradient(const HostDeviceVector<bst_float>& preds,

                  const MetaInfo& info,

                  int iter,

                  HostDeviceVector<GradientPair>* out_gpair) override {  

CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match";  

   // quick consistency when group is not available 

std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size());  

   const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;

   CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size())  

<< "group structure not consistent with #rows" << ", " 

<< "group ponter size: " << gptr.size() << ", " 

<< "labels size: " << info.labels_.Size() << ", " 

<< "group pointer back: " << (gptr.size() == 0 ? 0 : gptr.back());

#if defined(__CUDACC__) 

   // Check if we have a GPU assignment; else, revert back to CPU 

   auto device = tparam_->gpu_id;  

   if (device >= 0) {

     ComputeGradientsOnGPU(preds, info, iter, out_gpair, gptr);  

} else {

     // Revert back to CPU 

#endif 

     ComputeGradientsOnCPU(preds, info, iter, out_gpair, gptr);  

#if defined(__CUDACC__) 

   }  

#endif 

 }  

private:  

 bst_float ComputeWeightNormalizationFactor(const MetaInfo& info,

                                            const std::vector<unsigned> &gptr) {

   const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);  

   bst_float sum_weights = 0;  

   for (bst_omp_uint k = 0; k < ngroup; ++k) {

     sum_weights += info.GetWeight(k);  

   }  

   return ngroup / sum_weights;

 }

取pair举例

Lambda汇总

2.1利用cpu计算梯度值

 void ComputeGradientsOnCPU(const HostDeviceVector<bst_float>& preds,

                            const MetaInfo& info,

                            int iter,

                            HostDeviceVector<GradientPair>* out_gpair,  

                            const std::vector<unsigned> &gptr) {

LOG(DEBUG) << "Computing " << LambdaWeightComputerT::Name() << " gradients on CPU.";  

   bst_float weight_normalization_factor = ComputeWeightNormalizationFactor(info, gptr);  

   const auto& preds_h = preds.HostVector();

   const auto& labels = info.labels_.HostVector();

   std::vector<GradientPair>& gpair = out_gpair->HostVector();  

   const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);  

   out_gpair->Resize(preds.Size());  

   #pragma omp parallel 

   {  

     // parallel construct, declare random number generator here, so that each 

     // thread use its own random number generator, seed by thread id and current iteration 

     std::minstd_rand rnd((iter + 1) * 1111);  

     std::vector<LambdaPair> pairs;  

     std::vector<ListEntry>  lst;  

     std::vector< std::pair<bst_float, unsigned> > rec;  

     //gpair存的是相对应的一二阶梯度 

     #pragma omp for schedule(static) 

     for (bst_omp_uint k = 0; k < ngroup; ++k) {//在同一个group里进行这个操作 

       lst.clear(); pairs.clear();

       for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {

         lst.emplace_back(preds_h[j], labels[j], j);  

         gpair[j] = GradientPair(0.0f, 0.0f);  

       }//初始化梯度对 

       std::stable_sort(lst.begin(), lst.end(), ListEntry::CmpPred);//按预测值排序

       rec.resize(lst.size());  

       for (unsigned i = 0; i < lst.size(); ++i) {

         rec[i] = std::make_pair(lst[i].label, i);  

       }//rec存储预测值对应的label以及在预测值中对应的序号 

       std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);  

       // enumerate buckets with same label, for each item in the lst, grab another sample randomly 
 for (unsigned i = 0; i < rec.size(); ) {

         unsigned j = i + 1;  

         while (j < rec.size() && rec[j].first == rec[i].first) ++j;//桶内元素是否增加 

         // bucket in [i,j), get a sample outside bucket 

unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);  

         if (nleft + nright != 0) {

           int nsample = param_.num_pairsample;

           while (nsample --) {//一个label取nsample个样本 

             for (unsigned pid = i; pid < j; ++pid) {//出现同一个label的情况下遍历桶内的每个点 

               unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);//在0到数组长度减桶长之间产生随机数 

               if (ridx < nleft) {

                 pairs.emplace_back(rec[ridx].second, rec[pid].second,//小于的话不变 

                     info.GetWeight(k) * weight_normalization_factor);  

} else {

                 pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second,//大于的话加上桶长 

                     info.GetWeight(k) * weight_normalization_factor);  

               }  

             }  

           }  

         }  

         i = j;  

       }  

       // get lambda weight for the pairs 

       LambdaWeightComputerT::GetLambdaWeight(lst, &pairs);  

       // rescale each gradient and hessian so that the lst have constant weighted 

       float scale = 1.0f / param_.num_pairsample;

       if (param_.fix_list_weight != 0.0f) {

         scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);  

       }  

       for (auto & pair : pairs) {

         const ListEntry &pos = lst[pair.pos_index];

         const ListEntry &neg = lst[pair.neg_index];

         const bst_float w = pair.weight * scale;

         const  float eps = 1e-16f;

         bst_float p = common::Sigmoid(pos.pred - neg.pred);  

         bst_float g = p - 1.0f;  

         bst_float h = std::max(p * (1.0f - p), eps);  

         // accumulate gradient and hessian in both pid, and nid 

         gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h);  

         gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h);  

       }  

     }  

   }  

 }  

};

class PairwiseLambdaWeightComputer {

public:  

 /*!

  * \brief get lambda weight for existing pairs - for pairwise objective

  * \param list a list that is sorted by pred score

  * \param io_pairs record of pairs, containing the pairs to fill in weights

  */ 

 static  void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,

                             std::vector<LambdaPair> *io_pairs) {}  

 static  char  const* Name() {  

   return  "rank:pairwise";  

 }  

#if defined(__CUDACC__) 

 PairwiseLambdaWeightComputer(const bst_float *dpreds,

                              const bst_float *dlabels,

                              const dh::SegmentSorter<float> &segment_label_sorter) {}  

 class PairwiseLambdaWeightMultiplier {

  public:  

   // Adjust the items weight by this value 

__device__ __forceinline__ bst_float GetWeight(uint32_t gidx, int pidx, int nidx) const {

     return 1.0f;

   }  

 };  

 inline  const PairwiseLambdaWeightMultiplier GetWeightMultiplier() const {

   return {};

 }  

#endif 

};

Class Diagram1.png

void GetGradient(const HostDeviceVector<bst_float>& preds,

                  const MetaInfo& info,

                  int iter,

                  HostDeviceVector<GradientPair>* out_gpair) override {  

CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match";  

   // quick consistency when group is not available 

std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size());  

   const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;

   CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size())  

<< "group structure not consistent with #rows" << ", " 

<< "group ponter size: " << gptr.size() << ", " 

<< "labels size: " << info.labels_.Size() << ", " 

<< "group pointer back: " << (gptr.size() == 0 ? 0 : gptr.back());

#if defined(__CUDACC__) 

   // Check if we have a GPU assignment; else, revert back to CPU 

   auto device = tparam_->gpu_id;  

   if (device >= 0) {

     ComputeGradientsOnGPU(preds, info, iter, out_gpair, gptr);  

} else {

     // Revert back to CPU 

#endif 

     ComputeGradientsOnCPU(preds, info, iter, out_gpair, gptr);  

#if defined(__CUDACC__) 

   }  

#endif 

 }

2.2利用gpu计算梯度值

#if defined(__CUDACC__) 

 void ComputeGradientsOnGPU(const HostDeviceVector<bst_float>& preds,

                            const MetaInfo& info,

                            int iter,

                            HostDeviceVector<GradientPair>* out_gpair,  

                            const std::vector<unsigned> &gptr) {

LOG(DEBUG) << "Computing " << LambdaWeightComputerT::Name() << " gradients on GPU.";  

   auto device = tparam_->gpu_id;  

   dh::safe_cuda(cudaSetDevice(device));  

   bst_float weight_normalization_factor = ComputeWeightNormalizationFactor(info, gptr);  

   // Set the device ID and copy them to the device 

   out_gpair->SetDevice(device);  

   info.labels_.SetDevice(device);  

   preds.SetDevice(device);  

   info.weights_.SetDevice(device);  

   out_gpair->Resize(preds.Size());  

   auto d_preds = preds.ConstDevicePointer();  

   auto d_gpair = out_gpair->DevicePointer();  

   auto d_labels = info.labels_.ConstDevicePointer();  

   SortedLabelList slist(param_);  

   // Sort the labels within the groups on the device 

   slist.Sort(info.labels_, gptr);  

   // Initialize the gradients next 

   out_gpair->Fill(GradientPair(0.0f, 0.0f));  

   // Finally, compute the gradients 

   slist.ComputeGradients<LambdaWeightComputerT>  

     (d_preds, d_labels, info.weights_, iter, d_gpair, weight_normalization_factor);  

 }  

#endif 

#if defined(__CUDACC__) 

class SortedLabelList : dh::SegmentSorter<float> {  

private:  

 const LambdaRankParam ¶m_;// Objective configuration 

public:  

 explicit SortedLabelList(const LambdaRankParam ¶m)

   : param_(param) {}  

 // Sort the labels that are grouped by 'groups' 

 void Sort(const HostDeviceVector<bst_float> &dlabels, const std::vector<uint32_t> &groups) {

   this->SortItems(dlabels.ConstDevicePointer(), dlabels.Size(), groups);  

 }

Class Diagram 4.png

 // This kernel can only run *after* the kernel in sort is completed, as they 

 // use the default stream 

 template <typename LambdaWeightComputerT>

 void ComputeGradients(const bst_float *dpreds,// Unsorted predictions 

                       const bst_float *dlabels,// Unsorted labels 

                       const HostDeviceVector<bst_float> &weights,

                       int iter,

                       GradientPair *out_gpair,  

                       float weight_normalization_factor) {

   // Group info on device 

   const auto &dgroups = this->GetGroupsSpan();  

uint32_t ngroups = this->GetNumGroups() + 1;  

uint32_t total_items = this->GetNumItems();  

   uint32_t niter = param_.num_pairsample * total_items;  

   float fix_list_weight = param_.fix_list_weight;

   const auto &original_pos = this->GetOriginalPositionsSpan();  

   uint32_t num_weights = weights.Size();  

   auto dweights = num_weights ? weights.ConstDevicePointer() : nullptr;  

   const auto &sorted_labels = this->GetItemsSpan();  

   // This is used to adjust the weight of different elements based on the different ranking 

   // objective function policies 

   LambdaWeightComputerT weight_computer(dpreds, dlabels, *this);  

   auto wmultiplier = weight_computer.GetWeightMultiplier();  

   int device_id = -1;

   dh::safe_cuda(cudaGetDevice(&device_id));  

   // For each instance in the group, compute the gradient pair concurrently 

   dh::LaunchN(device_id, niter, nullptr, [=] __device__(uint32_t idx) {  

     // First, determine the group 'idx' belongs to 

     uint32_t item_idx = idx % total_items;  

     uint32_t group_idx =  

         thrust::upper_bound(thrust::seq, dgroups.begin(),  

                             dgroups.begin() + ngroups, item_idx) -  

         dgroups.begin();  

     // Span of this group within the larger labels/predictions sorted tuple 

     uint32_t group_begin = dgroups[group_idx - 1];  

     uint32_t group_end = dgroups[group_idx];  

     uint32_t total_group_items = group_end - group_begin;  

     // Are the labels diverse enough? If they are all the same, then there is nothing to pick 

     // from another group - bail sooner 

     if (sorted_labels[group_begin] == sorted_labels[group_end - 1]) return;  

     // Find the number of labels less than and greater than the current label 

     // at the sorted index position item_idx 

     uint32_t nleft  = CountNumItemsToTheLeftOf(  

       sorted_labels.data() + group_begin, item_idx - group_begin + 1, sorted_labels[item_idx]);  

     uint32_t nright = CountNumItemsToTheRightOf(  

       sorted_labels.data() + item_idx, group_end - item_idx, sorted_labels[item_idx]);  

     // Create a minstd_rand object to act as our source of randomness 

     thrust::minstd_rand rng((iter + 1) * 1111);  

     rng.discard(((idx / total_items) * total_group_items) + item_idx - group_begin);  

     // Create a uniform_int_distribution to produce a sample from outside of the 

     // present label group 

     thrust::uniform_int_distribution<int> dist(0, nleft + nright - 1);  

     int sample = dist(rng);

     int pos_idx = -1;// Bigger label 

     int neg_idx = -1;// Smaller label 

     // Are we picking a sample to the left/right of the current group? 

     if (sample < nleft) {

       // Go left 

       pos_idx = sample + group_begin;  

       neg_idx = item_idx;  

} else {

       pos_idx = item_idx;  

       uint32_t items_in_group = total_group_items - nleft - nright;  

       neg_idx = sample + items_in_group + group_begin;  

     }  

     // Compute and assign the gradients now 

     const  float eps = 1e-16f;

     bst_float p = common::Sigmoid(dpreds[original_pos[pos_idx]] - dpreds[original_pos[neg_idx]]);  

     bst_float g = p - 1.0f;  

     bst_float h = thrust::max(p * (1.0f - p), eps);  

     // Rescale each gradient and hessian so that the group has a weighted constant 

     float scale = __frcp_ru(niter / total_items);

     if (fix_list_weight != 0.0f) {

       scale *= fix_list_weight / total_group_items;  

     }  

     float weight = num_weights ? dweights[group_idx - 1] : 1.0f;

     weight *= weight_normalization_factor;  

     weight *= wmultiplier.GetWeight(group_idx - 1, pos_idx, neg_idx);  

     weight *= scale;  

     // Accumulate gradient and hessian in both positive and negative indices 

     const GradientPair in_pos_gpair(g * weight, 2.0f * weight * h);

     dh::AtomicAddGpair(&out_gpair[original_pos[pos_idx]], in_pos_gpair);  

     const GradientPair in_neg_gpair(-g * weight, 2.0f * weight * h);

     dh::AtomicAddGpair(&out_gpair[original_pos[neg_idx]], in_neg_gpair);  

   });  

   // Wait until the computations done by the kernel is complete 

   dh::safe_cuda(cudaStreamSynchronize(nullptr));  

 }  

};  

#endif