Goroutine 随笔

很早之前在lua中实现过一版协程，lua的栈是虚拟的，当要切换协程时虚拟栈不需要退栈，只需要从C的栈（物理栈）退出。恢复也简单，直接在lua的虚拟栈压入返回值，lua就可以继续运行了。这版协程比较简单，虽然不支持lua的栈与C栈交替存在，但当时也没这种需求，运行的也算问题，没什么特别的Bug。

最近在看Go，开始以为Go的routine也是虚拟栈，很快发现不对，Go是编译语言，不是解释语言，Go的栈是物理栈。当时就想物理栈退栈了怎么再回到现场的，搜了一下资料才发现自己秀逗了。物理栈不需要退栈，切换时保存好指令寄存器和栈指针寄存器，恢复的时候再把3个寄存器一恢复就行了。反而比虚拟栈还要简单些，顺着原理自己简单的实现了一把，虽然过程有点曲折，效果还算满意的。

首先是P，只包含了一个G的队列

#include <queue>

class G;
class P
{
public:
    void addG(G* g)
    {
        gs.push(g);
    }

    G* popG()
    {
        if (gs.size() == 0)
        {
            return nullptr;
        }

        G* g = gs.front();
        gs.pop();
        return g;
    }

private:
    std::queue<G*> gs;
};

G 用ID来模拟线程存储数据，esp_变量保存栈顶指针

//G.H
typedef void(__stdcall *routine)(int);

namespace runtime
{
    void yield();
}

class M;
class G
{
public:
    G(routine fun, int arg)
    {
        id = _id++;

        //16MB的栈，开始申请了4KB结果溢出了 各种Bug 以为栈指针操作出了问题
        const int stackSize = 16 * 1024 * 1024;

        //构建堆栈
        BYTE* mem = (BYTE*)malloc(stackSize + 1024); //多申请1KB
        DWORD* p = (DWORD*)(mem + stackSize);

        *p-- = (DWORD)mem;
        *p-- = (DWORD)this;
        *p-- = arg;
        *p-- = (DWORD)fun;
        *p-- = (DWORD)(mem + stackSize); //EBP
        *p = 0; //标识位 0表示routine还没有运行栈，栈中都是数据
        esp_ = (DWORD)p;

        dead_ = false;
    }

private:
    G() //g0的构造函数
    {
        id = _id++;
        dead_ = false;

        //g0不需要栈，用系统的栈
    }

private:
    void Attach(M* m)
    {
        m_ = m;
        if (m != nullptr)
        {
            current_ = this;
        }
        else
        {
            current_ = nullptr;
        }
    }

public:
    static G* GetCurrent()
    {
        return current_;
    }

    int getId()
    {
        return id;
    }

private:
    int id;
    bool dead_;
    M* m_;

    DWORD esp_;
    friend class M;

private:
    static thread_local G* current_;
    static int _id;
};

//G.cpp
thread_local G* G::current_;
int G::_id;

void runtime::yield()
{
    M::GetCurrent()->yieldCurrentG();
}

最后是M，G的切换，没实现调度

//M.h
class M
{
public:
    M(P* p);

    ~M();

public:
    void switchG();
    void yieldCurrentG();
    void addG(G* g);

    static M* GetCurrent()
    {
        return current_;
    }

private:
    void freeG(G* g, PVOID pStackMem);

private:
    G* currentG_;
    G* g0;
    P* p_;


    static thread_local M* current_;
};

//M.cpp
thread_local M* M::current_;

M::M(P* p)
{
    p_ = p;

    g0 = new G();
    currentG_ = g0;
    g0->Attach(this);

    current_ = this;
}

M::~M()
{
    delete g0;
    current_ = nullptr;
}

void M::addG(G * g)
{
    p_->addG(g);
}

__declspec(naked) void resumeG()
{
    __asm
    {
        pop edi //return address
        pop ecx //this M的指针
        pop eax //参数
        push edi
        push ecx

        mov esp, eax
        sub esp, 8  //上上行 push了一个EDI，一个ECX
        add eax, 36
        mov ebp, eax //比pushad大点
        mov edx, ecx
        pop ecx //this
        pop edi //return address
        pop eax  //flag

        cmp eax, 0
        jne RESUME

        //新routine 初始化调用栈
        pop ebp
        pop eax
        pop esi
        push edx //保留this指针
        push esi
        call eax
        pop ecx //this指针
        call M::freeG  //freeG不会返回了

    RESUME:
        pop eax
        mov esp, eax
        add eax, 36
        mov ebp, eax //比pushad大点
        pop eax
        mov ebp, eax
        push edi
        ret
    }
}

void M::freeG(G* g, PVOID pStackMem)
{
    if (g == currentG_)
    {
        g->esp_ = (DWORD)pStackMem;
        g->dead_ = true;

        //切换到g0释放内存
        DWORD esp_ = g0->esp_;
        __asm
        {
            mov eax, esp_
            push eax
            mov ecx, this
            push ecx
            call resumeG
        }
    }
}

//g0堆栈
void M::switchG()
{
    DWORD esp_ = 0;

    while (true)
    {
        currentG_ = p_->popG();
        if (currentG_ == nullptr)
        {
            //都运行完了
            break;
        }

        __asm
        {
            mov eax, ebp
            push eax
            mov eax, esp
            push eax
            mov eax, 1
            push eax
            mov esp_, esp
        }
        g0->esp_ = esp_;
        g0->Attach(nullptr);
        currentG_->Attach(this);

        esp_ = currentG_->esp_;
        __asm
        {
            mov eax, esp_
            push eax
            mov eax, this
            push eax
            call resumeG
        }

        if (currentG_->dead_)
        {
            //此时在g0，但currentG不是g0
            free((void*)currentG_->esp_);
            delete currentG_;
            currentG_ = nullptr;
        }
    }
}

void M::yieldCurrentG()
{
    //保存现场
    DWORD esp_ = 0;
    G* g = currentG_;

    __asm
    {
        mov eax, dword ptr[ebp - 4]
        mov eax, ebp
        push eax
        mov eax, esp
        push eax
        mov eax, 1
        push eax
        mov esp_, esp
    }
    g->esp_ = esp_;
    g->Attach(nullptr);
    p_->addG(g);

    //切换到g0
    g0->Attach(this);
    esp_ = g0->esp_;

    __asm
    {
        mov eax, esp_
        push eax
        mov eax, this
        push eax
        call resumeG
    }

    //从其他routine回来了
    currentG_ = g;
    g->Attach(this);
}

所有的G都会从 resumeG 开始，resumeG 切换堆栈，进入G的routine函数执行，由于没实现调度，所以需要代码中调用runtime::yield来切换routine。runtime::yield会调用M::yieldCurrentG，M::yieldCurrentG保存现场后切换到g0，g0选择下一个routine，再通过resumeG切换堆栈和指令寄存器，yieldCurrentG得以继续执行。routine函数执行完成后会回到最初的resumeG， resumeG再调用M::freeG来销毁G，至此routine完结销毁。

//测试代码
void __stdcall routine1(int arg)
{
    G* g = G::GetCurrent();
    printf("fun=routine1, id=%d, arg=%d\n", g->getId(), arg);
    runtime::yield();
    printf("fun=routine1, id=%d, arg=%d\n", g->getId(), arg);
}

void __stdcall routine2(int arg)
{
    G* g = G::GetCurrent();
    printf("fun=routine2, id=%d, arg=%d\n", g->getId(), arg);
    runtime::yield();
    printf("fun=routine2, id=%d, arg=%d\n", g->getId(), arg);
}

int main()
{
    P p;
    M m(&p);

    m.addG(new G(&routine1, 1));
    m.addG(new G(&routine2, 1));
    m.addG(new G(&routine1, 2));
    m.addG(new G(&routine2, 2));

    m.switchG();

    return 0;
}

交替运行效果图

最关键就是M默认的g0持有的是原生的系统栈，对其他routine的栈的操作都要在g0中进行。避免内存溢出，堆栈破坏。

Goroutine 随笔

友情链接更多精彩内容