C++实现pipeline模式

Pipeline 模式

pipeline模式被称为管道模式或者流水线模式。用于解耦对数据的不同处理阶段，利用管道将数据连接起来，每个节点负责做不同的事情，等到流过了整个管道，处理也被出来完成了。

设计思路

通过队列连接节点，当前节点的输出是下一个节点的输入。
将每个节点需要的数据以及处理后的结果全都放在一个结构体里面，流经不同的节点取出对应需要的数据进行处理。
队列中存储的是数据的指针，用来减少数据的复制拷贝操作。
设计一个节点的基础类，需要自定义节点则继承该类，实现自定义的功能

代码

数据基类
数据基类定义了clone函数，用于应对需要将数据复制一份的需求
meta.hpp

#ifndef META_HPP__
#define META_HPP__

// 定义Meta基类，支持克隆功能
template<typename T>
struct Meta {
    virtual ~Meta() = default;
    virtual std::shared_ptr<T> clone() const = 0;  // 数据克隆功能
};

#endif

计算处理时间
用于计算当前作用域处理完成需要的时间
timer.hpp

#ifndef TIMER_HPP__
#define TIMER_HPP__


#include <string>
#include <iostream>
#include "logger/simple-logger.hpp"

class Timer {
public:
    Timer(const std::string& nodeName) 
    : node_name_(nodeName), start_(std::chrono::steady_clock::now()) { }

    ~Timer() {
        auto end = std::chrono::steady_clock::now();
        auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start_).count();
        INFO("Node : ", node_name_, " time : ", duration);
    }

private:
    std::string node_name_;
    std::chrono::steady_clock::time_point start_;
};


#endif

线程安全队列
threadSafeQueue.hpp

#ifndef THREADSAFEQUEUE_HPP
#define THREADSAFEQUEUE_HPP

#include <queue>
#include <memory>
#include <vector>
// #include "logger/simple-logger.hpp"

template <typename T>
class ThreadSafeQueue{

public:
    void push(std::shared_ptr<T> item) 
    {
        {
            std::lock_guard<std::mutex> lock(mutex_);
            queue_.push(item);
        }
        cond_.notify_one();
    }

    void pushBatch(std::vector<std::shared_ptr<T>> items) 
    {
        {
            std::lock_guard<std::mutex> lock(mutex_);
            for (auto& item : items)
            {
                queue_.push(item);
            }
        }
        cond_.notify_one();
    }

    std::shared_ptr<T> pop(std::shared_ptr<T> item) 
    {
        std::unique_lock<std::mutex> lock(mutex_);
        cond_.wait(lock, [this] { return !queue_.empty(); });
        std::shared_ptr<Data> value;
        if (!queue_.empty())
        {
            value = queue_.front();
        }
        return value;
    }

    std::vector<std::shared_ptr<T>> popBatch(size_t batch_size) 
    {
        std::unique_lock<std::mutex> lock(mutex_);
        cond_.wait(lock, [this] { return !queue_.empty(); });

        std::vector<std::shared_ptr<T>> batch;
        while (!queue_.empty() && batch.size() < batch_size) 
        {
            batch.push_back(queue_.front());
            queue_.pop();
        }
        return batch;
    }

private:
    std::queue<std::shared_ptr<T>> queue_;
    std::mutex mutex_;
    std::condition_variable cond_;
};

#endif

节点类
node.hpp

#ifndef NODE_HPP__
#define NODE_HPP__

#include <iostream>
#include <memory>
#include <string>
#include <queue>
#include <vector>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <atomic>
#include <functional>
#include <chrono>

#include "node/common/threadSafeQueue.hpp"
#include "node/common/timer.hpp"


// 节点基类
template<typename T>
class Node {
public:
    Node(const std::string& name, int batchSize = 1) 
        : name_(name), batchSize_(batchSize), running_(false) {}
    virtual ~Node() { stop(); }

    // 连接前后节点
    void connectTo(const std::shared_ptr<Node<T>>& nextNode) {
        auto queue = std::make_shared<ThreadSafeQueue<T>>();
        outputQueue_ = queue;
        nextNode->inputQueue_ = queue;
    }

    void start() 
    {
        running_ = true;
        workerThread_ = std::thread(&Node::worker, this);
    }

    void stop() 
    {
        running_ = false;
        if (workerThread_.joinable()) 
        {
            workerThread_.join();
        }
    }

protected:
    // 处理数据的虚函数，交由子类实现
    virtual void handleMeta(const std::vector<std::shared_ptr<T>>& metas) = 0;

    // 工作线程，执行处理逻辑, 子类也可以自己实现
    virtual void worker() 
    {
        while (running_) 
        {
            Timer timer(name_);  // 使用计时类计算处理时间
            auto metas = inputQueue_->batchPop(batchSize);
            handleMeta(metas);  // 调用处理数据的函数
            outputQueue_->pushBatch(metas);
        }
    }

    std::shared_ptr<ThreadSafeQueue<T>> inputQueue_;   // 输入队列
    std::shared_ptr<ThreadSafeQueue<T>> outputQueue_;  // 输出队列
    std::string name_;      // 节点名称
    std::thread workerThread_;
    std::atomic<bool> running_;
    size_t batchSize_ = 1;  // 批量处理数据的大小
};


#endif // namespace NODE_HPP__

C++实现pipeline模式

Pipeline 模式

设计思路

代码

推荐阅读更多精彩内容