Python跳一跳：使用Cython加速opencv像素级访问

简要概述

网上已经有很多Python实现的跳一跳辅助程序，有基于模版匹配的，还有基于深度学习端到端的方法，都很厉害。但是没有一种算法和我自己想的一样：寻找一行上与背景不一样的像素，找出其最值，当最值连续不变化三次时，即认为找到了中心点的y坐标，而x坐标选择第一行存在于背景色不一致的像素x值得平均值。所以自己写代码把想法实现了出来。
主要的算法如下：
1 使用模版匹配寻找棋子的位置
2 根据棋子坐标截取篮框部分用以识别吓一跳的中心坐标
3 用Cython实现的子程序识别篮筐部分的中心：x坐标为第一行存在于背景色不一致的像素x值得平均值， y坐标为连续三次与背景色颜色不一致像素x坐标的最值不产生变化时的y值；在寻找中心时，兼顾寻找RGB=(245, 245, 245)的像素区域中心，用以纠正识别误差；如图中Out所示。
4 最后根据识别的棋子和块中心计算出像素距离，计算跳跃时间；跳跃时间先是使用简单的线性模型，然后不断地记录调到正中心的距离和时间，最后使用KNN算法给出下一跳的时间。

识别过程示意

Code

首先是Cython写的像素级访问函数，文件名为fastLocation.pyx，注意后缀是.pyx而非py。

import numpy as np
cimport numpy as np
cimport cython

DTYPE = np.uint8
ctypedef np.uint8_t DTYPE_t


cdef unsigned char absSub(unsigned char v1, unsigned char v2):
    return v1-v2 if v1>v2 else v2-v1

@cython.boundscheck(False)
@cython.wraparound(False)
def chessDetect(np.ndarray[DTYPE_t, ndim=3] image):
    cdef int height, width, i, j, xmin, xmax, prexmin=0, prexmax=0, x, y, rcount=0, lcount=0, xcount = 0, xsum=0,whitex=0, whitey = 0, whitecount=0, ai, aj
    cdef bint Foundx=False, Foundxmin
    cdef unsigned int diff
    height = image.shape[0]
    width = image.shape[1]
    cdef np.ndarray[DTYPE_t, ndim=2] out = np.zeros([height, width], dtype=DTYPE)
    cdef np.ndarray[DTYPE_t, ndim=1] backgroundColor, t
    backgroundColor = image[0, 0]
    for i in range(height):
        xmin = 0
        xmax = 0
        Foundxmin = False
        for j in range(1, width):
            t = image[i, j]
            if t[0] == 245 and t[1] == 245 and t[2] == 245:
                whitex += j
                whitey += i
                whitecount += 1
            diff = absSub(t[0], backgroundColor[0]) + absSub(t[1], backgroundColor[1]) + absSub(t[2], backgroundColor[2])
            if diff > 30:
                out[i, j] = 255
                if not Foundx:
                    xsum += j
                    xcount += 1
                if not Foundxmin:
                    xmin = j
                    Foundxmin = True
                xmax = j
        if xcount != 0:
            x = xsum // xcount
            Foundx = True
        if (xmin == prexmin or xmax == prexmax) and Foundx and (xmax-x>50 or x-xmin>50):
            # print(xmax, xmin, xmax-xmin) 
            if xmin == prexmin and xmax == prexmax:
                lcount += 1
            if xmax == prexmax:
                rcount += 1
        if lcount >= 2 or rcount >= 6: 
            y = i
            break
        prexmin = xmin
        prexmax = xmax
    for ai in range(i, min(height, i+20)):
        for aj in range(1, width):
            t = image[ai, aj]
            if t[0] == 245 and t[1] == 245 and t[2] == 245:
                whitex += aj
                whitey += ai
                whitecount += 1
            diff = absSub(t[0], backgroundColor[0]) + absSub(t[1], backgroundColor[1]) + absSub(t[2], backgroundColor[2])
            if diff > 30:
                out[ai, aj] = 255
    if whitecount != 0:
        # print("Here", whitex, whitey, whitecount)
        whitex = int(whitex/whitecount)
        whitey = int(whitey/whitecount)
    return out, x, y, whitex, whitey

关于如何使用Python与numpy交互，请参阅Cython文档。然后再同目录下建立setup.py

from distutils.core import setup, Extension
from Cython.Build import cythonize
import numpy

setup(ext_modules=cythonize("fastGetLocation.pyx"),  include_dirs=[numpy.get_include()])

然后，在命令行使用

python setup.py build_ext --inplace

编译Cython生成对应的C代码和可以被Python调用的库，这样Cython的像素级访问就完成啦。简单对比一下性能(基于Intel core i7 3630QM)，直接使用Python访问numpy进行处理需要8秒；使用Cython之后只需要400ms，提速约20倍；使用C++版本的OpenCV实现，处理一张图像仅需20ms。由此可见，还是C++的速度更快更好。
下面进入主题部分：

# encoding=utf-8
import cv2
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from fastGetLocation import chessDetect
import time
import os
import glob

class AutoJumper():

    def __init__(self):
        self.player = cv2.imread("player.jpg")
        if self.player is None:
            print("player.jpg lost, exiting...")
            exit(0)
        self.player_height, self.player_width, _ = self.player.shape
        self.screen_width, self.screen_height = 1080, 1920
        self.player_bias = 40 # 减去棋子宽度，缩小检测范围
        self.BEGIN_Y = 540 # 检索开始行
        self.delayTime = 1000
        self.debug = False
        self.paths = glob.glob(".\\backup\*.png")

        cv2.namedWindow("Auto_Jump^_^", 0)

        self.count, self.predistance, self.pretime = 0, 0, 0

        data = pd.read_csv("data.csv")
        print("%d pre data loaded!" % len(data))
        if len(data) > 500:
            data = data[len(data)-500:len(data)]
        reg_X = data['distance'].values.reshape(-1, 1)
        reg_y = data['time']
        self.knnreg = KNeighborsRegressor(n_neighbors=2).fit(reg_X, reg_y)

        # Running Parameter
        self.player_x, self.player_y = 0, 0
        self.chess_x, self.chess_y = 0, 0
        self.count = 0
        self.predistance, self.pretime = 0, 0
        self.currdistance, self.currtime = 0, 0
        self.jumpRight = False #恰好调到中央Flag

    def get_screenshot(self, id):
        os.system('adb shell screencap -p /sdcard/%s.png' % str(id))
        os.system('adb pull /sdcard/%s.png .' % str(id))


    def makeJump(self):
        press_x = int(320 + np.random.randint(20))    
        press_y = int(410 + np.random.randint(20))
        cmd = 'adb shell input swipe %d %d %d %d ' % (press_x, press_y, press_x, press_y) + str(self.currtime)
        os.system(cmd)

    def detectPlayer(self):
        res1 = cv2.matchTemplate(self.image, self.player, cv2.TM_CCOEFF_NORMED)
        min_val1, max_val1, min_loc1, max_loc1 = cv2.minMaxLoc(res1)
        top_left = max_loc1
        bottom_right = (top_left[0] + self.player_width//2, top_left[1] + self.player_height) 
        cv2.circle(self.image, bottom_right, 10, 255, 10)
        self.player_x, self.player_y = bottom_right

    def detectChess(self):
        if self.player_x >= self.screen_width/2:
            startx, endx, starty, endy = 0, max(self.player_x-self.player_bias, 10), self.BEGIN_Y, self.player_y
        else:
            startx, endx, starty, endy = self.player_x+self.player_bias, self.screen_width, self.BEGIN_Y, self.player_y
        out, x, y, whitex, whitey = chessDetect(self.image[starty:endy, startx:endx])
        cv2.rectangle(self.image, (startx, starty), (endx, endy), 255, 10)
        cv2.circle(self.image, (whitex+startx, whitey+starty), 20, (0, 255, 0), 10)
        cv2.circle(self.image, (x+startx, y+starty), 10, (0, 0, 255), 10)
        # if self.count % 5 != 0:
        #     y = self.player_y - abs(x-self.player_x)*1.732/3
        if abs(x-whitex) + abs(y-whitey) < 30:
            x = whitex
            y = whitey
            self.jumpRight = True
        self.chess_x, self.chess_y = x+startx, y+starty

    def calDistanceAndTime(self):
        self.currdistance = np.sqrt((self.chess_x-self.player_x)**2+(self.chess_y-self.player_y)**2)
        self.currtime = int(self.knnreg.predict(self.currdistance))

    def showImage(self):
        cv2.imshow("Auto_Jump^_^", self.image)
        if cv2.waitKey(self.delayTime) & 0xFF == 27:
            print("Ese key pressed, exiting")
            exit(0)
    def parameterUpdate(self):
        self.count += 1
        self.predistance, self.pretime = self.currdistance, self.currtime
        if self.jumpRight:
            f = open("data.csv", 'a')
            print("Writing log: (%f, %d)" % (self.predistance, self.pretime))
            f.write("%f,%d\n" % (self.predistance, self.pretime))
            f.close()
        self.jumpRight = False

    def jump(self):
        t = time.time()
        self.get_screenshot(0)
        if self.debug:
            self.image = cv2.imread(self.paths[self.count])
            self.delayTime = 0
        else:
            self.image = cv2.imread("0.png")
        self.detectPlayer()
        self.detectChess()
        self.calDistanceAndTime()
        self.makeJump()
        self.showImage()
        self.parameterUpdate()
        print("\nStep %d:" % self.count, time.time()-t)

if __name__ == '__main__':
    jumper = AutoJumper()
    while True:
        jumper.jump()

主体部分的代码和其他作者的代码大同小异，所以没怎么写注释。这里使用了KNN算法去计算距离，并且在收集数据较多时，只取后500项数据进行训练，理论上具有一定的自学习能力。

距离时间模型

根据我自己手机的数据（小米Note标准版），绘制成一下时间距离图像，横轴为像素距离，纵轴为跳跃时间。

距离-时间图像

从图中可以看出，距离时间大体上呈线性关系，但是在两端具有截面效应，而且由于高距离的样本偏少，会导致距离较远时跳跃时间样本不足，从而导致并不能一直跳在中心。

不足

其实我一直想实现能够一直调到正中心的算法，但是后来发现这个目标比较难。此算法目前达到的最高分是1538分。

最高分

每跳得分大致在6分左右（衡量不同算法的优劣的指标之一），与我理想中的32还相差甚远。识别正方体时还是比较准确的，但是对于圆筒就有差距了，虽然已经做了差异化处理，但是还是不够准确；另外一点就是距离时间的映射模型还有待提升。我想，实现这个算法最大的收获便是学习了Cython的使用吧。这也让我觉得，Python+C的技能储备应该是比较好的，这也会是我之后的技能发展方向。

Python跳一跳：使用Cython加速opencv像素级访问

简要概述

Code

距离时间模型

不足

推荐阅读更多精彩内容