最近有个需求,需要对比screenshots的变化,也就是简单的图像对比。
最简单的思路就是先把彩色图像转换成单通道的灰度图像,然后逐个像素点对比是否有不一致的灰度值。
其中sf_db2.hpp是对nanoodbc库的封装,前面文章中有概述。
http_util是对cpp_httplib的封装,前文也有概述。
文件比较大的时候可能下载失败需要重试。使用了一个decorator类。
opencv本身的实现就比较简单了,就是简单的读取,然后调函数。
速度一般般,一张图60ms左右吧。
所以使用了boost::asio::thread_pool线程池加速。
这里还可以改进的地方就是,其实元素分批,起线程池,统计失败结果这些都可以封装成一个task_manager类,这样以后还可以用。后面再做吧。
程序目录结构如下,
image.png
代码如下,
CMakeLists.txt
cmake_minimum_required(VERSION 2.6)
project(hello_world)
add_definitions(-std=c++14)
add_definitions(-g)
find_package(ZLIB)
find_package(OpenCV REQUIRED )
find_package(Boost REQUIRED COMPONENTS
system
filesystem
serialization
program_options
thread
)
include_directories(${Boost_INCLUDE_DIRS} /usr/local/include /usr/local/iODBC/include /opt/snowflake/snowflakeodbc/include/ ${CMAKE_CURRENT_SOURCE_DIR}/../../)
LINK_DIRECTORIES(/usr/local/lib /usr/local/iODBC/lib /opt/snowflake/snowflakeodbc/lib/universal)
file( GLOB APP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../http/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../img_util/impl/*.cpp)
foreach( sourcefile ${APP_SOURCES} )
file(RELATIVE_PATH filename ${CMAKE_CURRENT_SOURCE_DIR} ${sourcefile})
string(FIND "${filename}" "test.cpp" "TEMP")
if( NOT "${TEMP}" STREQUAL "-1" )
string(REPLACE ".cpp" "" file ${filename})
add_executable(${file} ${APP_SOURCES})
target_link_libraries(${file} ${Boost_LIBRARIES} ZLIB::ZLIB ${OpenCV_LIBS})
target_link_libraries(${file} ssl crypto libgtest.a libgtest_main.a pystring libgmock.a iodbc iodbcinst libnanodbc.a pthread)
endif()
endforeach( sourcefile ${APP_SOURCES})
images_.h
#ifndef _FREDRIC_IMAGES_H_
#define _FREDRIC_IMAGES_H_
#include "images_timeline/screenshots.h"
std::vector<screenshots> get_screenshots_from_db();
bool test_all_images_can_be_accessed();
bool test_meta_is_correct();
#endif
images_.cpp
#include "images_timeline/images_.h"
#include "http/http_util.h"
#include "images_timeline/decorator.hpp"
#include "img_util/img_util.h"
#include "json/json.hpp"
#include "pystring/pystring.h"
#include "sf_db2/sf_db2.h"
#include <boost/asio/post.hpp>
#include <boost/asio/thread_pool.hpp>
#include <algorithm>
#include <functional>
using json = nlohmann::json;
const int BatchSize = 2;
const std::string ConnStr = "dsn=product_odbc;pwd={YOUR_SF_PASSWORD}";
const std::string ImagePath = "../images";
const std::string CdnHost = "static-s.aa-cdn.net";
std::map<std::string, std::vector<std::string>> parse_screenshot_val(
const std::string& value) {
std::map<std::string, std::vector<std::string>> ret_val{};
auto json_value = json::parse(value);
// 使用 nlohmann::json 库解析json对象,拿取对象中的URL
// 对象格式 {"default":
// ["gp/20600013289355/OpozImyAlqDxklfG2v3MSHpUfWxeCUIhz2nqJf_g9knQU2cd9o4vY7OSSUnM7ElzBDyI"]}
for (auto &&image_it = json_value.begin(), end = json_value.end();
image_it != end; ++image_it) {
auto key_ = image_it.key();
std::vector<std::string> images_{};
auto image_vals_ = image_it.value();
for (auto image : image_vals_) {
images_.emplace_back(std::move(image.get<std::string>()));
}
ret_val[key_] = std::move(images_);
}
return std::move(ret_val);
}
std::map<std::string, std::string> parse_meta_val(const std::string& value) {
auto meta_value = json::parse(value);
std::map<std::string, std::string> ret_val;
for (auto &&meta_it = meta_value.begin(), end = meta_value.end();
meta_it != end; ++meta_it) {
auto key_ = meta_it.key();
auto val_ = meta_it.value().get<std::string>();
ret_val[key_] = val_;
}
return std::move(ret_val);
}
std::vector<screenshots> get_screenshots_from_db() {
auto conn_str = ConnStr;
auto raw_query =
R"(select product_key, old_value, new_value, meta, change_time
from AA_INTELLIGENCE_PRODUCTION.ADL_MASTER.dim_localized_event_service_v1_cluster_by_product_key
where market_code='apple-store'
and event_type_name='screenshot_change'
and meta is not null order by change_time desc limit 10;)";
sf_connection sf{conn_str};
auto res = sf.exec_raw_query(raw_query);
int ele_size = res.affected_rows();
const auto columns = res.columns();
std::vector<screenshots> res_eles{};
const std::string null_value = "null";
while (res.next()) {
auto const product_id_ = res.get<std::string>(0, null_value);
auto const old_json_str = res.get<std::string>(1, null_value);
auto const new_json_str = res.get<std::string>(2, null_value);
auto const meta_str = res.get<std::string>(3, null_value);
auto const change_time = res.get<std::string>(4, null_value);
auto old_value = parse_screenshot_val(old_json_str);
auto new_value = parse_screenshot_val(new_json_str);
auto meta_value = parse_meta_val(meta_str);
screenshots screenshots_{product_id_, old_value, new_value, meta_value,
change_time};
res_eles.emplace_back(std::move(screenshots_));
}
return std::move(res_eles);
}
bool test_a_image(const std::string& host, const std::string& path) {
std::string final_path = "/img/" + path;
std::string result_name = path;
// 原先URL path替换 "/"为 "_",作为文件名,就不用自己生成UUID了
auto tmp_result_name = pystring::replace(result_name, "/", "_");
std::string final_result_name = ImagePath + "/" + tmp_result_name + ".png";
bool res = HttpUtil::get_file(host, final_path, final_result_name);
if (!res) {
return false;
} else {
return true;
}
}
void divide_screenshots_to_batches(
const std::vector<screenshots>& screenshots_,
std::vector<std::vector<screenshots>>& sub_eles,
const std::size_t batches) {
for (int i = 0; i < batches; ++i) {
std::vector<screenshots> sub_ele{};
if (i + 1 < batches) {
for (int j = 0; j < BatchSize; ++j) {
sub_ele.emplace_back(
std::move(screenshots_[i * BatchSize + j]));
}
} else {
for (int j = 0; j < screenshots_.size() % BatchSize; ++j) {
sub_ele.emplace_back(
std::move(screenshots_[i * BatchSize + j]));
}
}
sub_eles.emplace_back(std::move(sub_ele));
}
}
using task_function_type = std::function<void(const std::vector<screenshots>&, int&)>;
void start_thread_pool_and_run_tasks(
const task_function_type& func,
const std::vector<std::vector<screenshots>>& sub_eles,
const std::size_t batches, std::vector<int>& compared_failed_nums) {
// 使用asio thread_pool启动线程池,运行子任务
boost::asio::thread_pool pool{batches};
for (int i = 0; i < batches; ++i) {
compared_failed_nums.push_back(0);
}
for (int i = 0; i < sub_eles.size(); ++i) {
boost::asio::post(pool, std::bind(func, std::ref(sub_eles[i]),
std::ref(compared_failed_nums[i])));
}
pool.join();
}
int collect_failed_count(const std::vector<int>& compared_failed_nums) {
int total_failed_num{0};
for (auto failed_num : compared_failed_nums) {
total_failed_num += failed_num;
}
return total_failed_num;
}
template <typename T>
using decorator_type = decorator<T>;
template <typename T>
void test_one_screenshots_value(decorator_type<T> image_test_func,
const screenshots_value_type& screenshot_values,
int& cant_be_accessed_count) {
for (auto&& device_obj : screenshot_values) {
auto images = device_obj.second;
for (auto& image_ : images) {
auto success = image_test_func(CdnHost, image_);
if (!success) {
std::cerr << "Download [ https://" << CdnHost << "/img/"
<< image_ << "] failed" << std::endl;
++cant_be_accessed_count;
}
}
}
}
void download_a_batch(const std::vector<screenshots>& screenshots_,
int& cant_be_accessed_count) {
auto image_test_func = make_decorator(test_a_image);
for (auto&& screenshot_ : screenshots_) {
auto old_values = screenshot_.old_value;
auto new_values = screenshot_.new_value;
test_one_screenshots_value(image_test_func, old_values,
cant_be_accessed_count);
test_one_screenshots_value(image_test_func, new_values,
cant_be_accessed_count);
}
}
bool test_all_images_can_be_accessed() {
auto screenshots_ = get_screenshots_from_db();
std::cout << "Total screenshots element count: " << screenshots_.size()
<< std::endl;
std::vector<std::vector<screenshots>> sub_eles;
// 按BatchSize大小进行分批,放进subVector中
std::size_t batches = screenshots_.size() / BatchSize + 1;
divide_screenshots_to_batches(screenshots_, sub_eles, batches);
std::vector<int> compared_failed_nums{};
start_thread_pool_and_run_tasks(download_a_batch, sub_eles, batches,
compared_failed_nums);
int total_failed_num = collect_failed_count(compared_failed_nums);
std::cout << "Total failed nums: [" << total_failed_num << "]" << std::endl;
return total_failed_num == 0;
}
bool compare_a_screenshot_list(const screenshots& screenshots_) {
auto meta_ = screenshots_.meta;
auto old_value_ = screenshots_.old_value;
auto new_value_ = screenshots_.new_value;
for (auto&& meta_ele : meta_) {
auto compare_key_prefix = meta_ele.first;
// The compared meta value
auto compare_val = meta_ele.second;
for (auto&& old_value_ele : old_value_) {
auto real_key = old_value_ele.first;
// Matched the compare key prefix
if (pystring::startswith(real_key, compare_key_prefix)) {
auto old_value_to_cmp = old_value_ele.second;
auto new_value_to_cmp = new_value_[real_key];
int old_size = old_value_to_cmp.size();
int new_size = new_value_to_cmp.size();
int min_size = old_size < new_size ? old_size : new_size;
// The actual compared meta value.
std::string act_compare_val{};
for (int i = 0; i < min_size; ++i) {
auto old_img_url = old_value_to_cmp[i];
auto new_img_url = new_value_to_cmp[i];
auto old_img_path =
ImagePath + "/" +
pystring::replace(old_img_url, "/", "_") + ".png";
auto new_img_path =
ImagePath + "/" +
pystring::replace(new_img_url, "/", "_") + ".png";
auto res =
ImageUtil::compare_equal(old_img_path, new_img_path);
// equal true, no change, add zero
if (res) {
act_compare_val += "0";
} else {
act_compare_val += "1";
}
}
if (!pystring::startswith(compare_val, act_compare_val)) {
return false;
}
}
}
}
return true;
}
void compare_a_batch(const std::vector<screenshots>& screenshots_,
int& compare_failed_count) {
for (auto&& screenshot_ : screenshots_) {
bool ret = compare_a_screenshot_list(screenshot_);
if (!ret) {
std::cerr << screenshot_;
++compare_failed_count;
}
}
}
bool test_meta_is_correct() {
auto screenshots_ = get_screenshots_from_db();
std::cout << "Total screenshots element count: " << screenshots_.size()
<< std::endl;
std::vector<std::vector<screenshots>> sub_eles;
// 按BatchSize大小进行分批,放进subVector中
std::size_t batches = screenshots_.size() / BatchSize + 1;
divide_screenshots_to_batches(screenshots_, sub_eles, batches);
std::vector<int> compared_failed_nums{};
start_thread_pool_and_run_tasks(compare_a_batch, sub_eles, batches,
compared_failed_nums);
int total_failed_num = collect_failed_count(compared_failed_nums);
std::cout << "Total failed nums: [" << total_failed_num << "]" << std::endl;
return total_failed_num == 0;
}
screenshots.h
#ifndef _FREDRIC_SCREENSHOTS_H_
#define _FREDRIC_SCREENSHOTS_H_
#include <iostream>
#include <map>
#include <string>
#include <vector>
using screenshots_value_type = std::map<std::string, std::vector<std::string>>;
struct screenshots {
std::string product_key;
std::map<std::string, std::vector<std::string>> old_value;
std::map<std::string, std::vector<std::string>> new_value;
std::map<std::string, std::string> meta;
std::string change_time;
friend std::ostream& operator<<(std::ostream& os,
const screenshots& screenshots_);
};
#endif
screenshots.cpp
#include "images_timeline/screenshots.h"
std::ostream& print_screenshot_value(
std::ostream& os, const screenshots_value_type& screenshots_value) {
for (auto&& map_ele : screenshots_value) {
os << map_ele.first << " ";
auto urls = map_ele.second;
for (auto&& url : urls) {
os << url << " ";
}
os << std::endl;
}
return os;
}
std::ostream& operator<<(std::ostream& os, const screenshots& screenshot_) {
os << "Compare screenshots failed: product_key: " << screenshot_.product_key
<< "Old value: ";
print_screenshot_value(os, screenshot_.old_value);
os << "New value: ";
print_screenshot_value(os, screenshot_.new_value);
os << "Meta: ";
for (auto&& map_ele : screenshot_.meta) {
os << map_ele.first << " " << map_ele.second << " ";
os << std::endl;
}
return os;
}
decorator.hpp
#ifndef _FREDRIC_DECORATOR_HPP_
#define _FREDRIC_DECORATOR_HPP_
#include <iostream>
#include <functional>
//-------------------------------
// BEGIN decorator implementation
//-------------------------------
template <class> struct decorator;
const int RetryCount = 3;
template <class R, class... Args>
struct decorator<R(Args ...)>
{
decorator(std::function<R(Args ...)> f) : f_(f) {}
R operator()(Args ... args)
{
R res;
for(int i=0; i<RetryCount; ++i) {
res = f_(args...);
if(res) {
return res;
} else {
std::cout <<"Failed, retry..." << std::endl;
}
}
return res;
}
std::function<R(Args ...)> f_;
};
template<class R, class... Args>
decorator<R(Args...)> make_decorator(R (*f)(Args ...))
{
return decorator<R(Args...)>(std::function<R(Args...)>(f));
}
#endif
image_test.cpp
#include "images_timeline/images_.h"
#include <gtest/gtest.h>
GTEST_TEST(ImagesTests, GetScreenshotsFromDB) {
auto screenshots = get_screenshots_from_db();
ASSERT_EQ(10, screenshots.size());
}
GTEST_TEST(ImagesTests, AllImagesCanBeAccessed) {
bool ret = test_all_images_can_be_accessed();
ASSERT_TRUE(ret);
}
GTEST_TEST(ImagesTests, MetaIsCorrect) {
bool ret = test_meta_is_correct();
ASSERT_TRUE(ret);
}
img_util.h
#ifndef _FREDRIC_IMG_UTIL_H_
#define _FREDRIC_IMG_UTIL_H_
#include <opencv2/opencv.hpp>
#include <string>
struct ImageUtil {
/**
* Compare two image is totally equal in pixels,
* This function may takes a lot of time
* @param src Source image
* @param dst Dest image
*
* @return an indicator whether two images have same pixels
*
* */
static bool compare_equal(const std::string& src, const std::string& dst);
};
#endif
img_util.cpp
#include "img_util/img_util.h"
bool ImageUtil::compare_equal(const std::string& src, const std::string& dst) {
cv::Mat img1 = cv::imread(src);
cv::Mat img2 = cv::imread(dst);
cv::Mat imgSrc{};
cv::Mat imgDst{};
cv::cvtColor(img1, imgSrc, cv::COLOR_BGR2GRAY);
cv::cvtColor(img2, imgDst, cv::COLOR_BGR2GRAY);
if (imgSrc.size() != imgDst.size()) {
std::cerr << "Image size is not equal.." << std::endl;
return false;
}
cv::Mat result{};
cv::compare(imgSrc, imgDst, result, cv::CMP_NE);
int nz = cv::countNonZero(result);
return nz == 0;
}
sf_db2.h
#ifndef _FREDRIC_SF_DB2_H_
#define _FREDRIC_SF_DB2_H_
#include "nanodbc/convert.h"
#include "nanodbc/nanodbc.h"
#include <any>
#include <exception>
#include <iostream>
#include <string>
using db_result = nanodbc::result;
struct sf_connection {
nanodbc::connection conn_;
sf_connection(const std::string& conn_str) {
conn_ = nanodbc::connection{convert(conn_str)};
}
db_result exec_raw_query(const std::string& raw_query) {
auto res = execute(conn_, NANODBC_TEXT(raw_query));
return std::move(res);
}
template <typename... Params>
db_result exec_prpare_statement(const std::string& pre_stmt,
Params... params) {
nanodbc::statement statement(conn_);
std::cout << pre_stmt << std::endl;
prepare(statement, NANODBC_TEXT(pre_stmt));
int index = 0;
int bind_arr[] = {(bind(statement, index, params), ++index)...};
auto res = execute(statement);
return std::move(res);
}
virtual ~sf_connection() {}
private:
template <typename T>
void bind(nanodbc::statement& stmt, int index, T param) {
std::vector<std::string> v{std::to_string(param)};
stmt.bind_strings(index, v);
}
void bind(nanodbc::statement& stmt, int index, const char* param) {
stmt.bind(index, param);
}
void bind(nanodbc::statement& stmt, int index, std::string param) {
stmt.bind(index, param.c_str());
}
};
#endif
http_util.h
#ifndef _HTTP_UTIL_H_
#define _HTTP_UTIL_H_
#define CPPHTTPLIB_OPENSSL_SUPPORT
#define CPPHTTPLIB_ZLIB_SUPPORT
#include "http/httplib.h"
#include <string>
const int ConnectionTimeout = 30;
class HttpUtil {
public:
/**
* HttpUtil get method
*
* @param: url the url to be used to get a web page from remote server
* @param: path the path to be used to get a web page from remote server
* @param: result_name the download result file path
*/
static bool get(std::string url, std::string path, std::string result_name);
/**
* HttpUtil get_file method
*
* @param: host the host to be used to get an item from remote server
* @param: path the path to be used to get an item from remote server
* @param: result_name the download result file path
*/
static bool get_file(std::string host, std::string path, std::string result_name);
static bool get_str(std::string host, std::string path, const std::map<std::string, std::string> & headers, std::string &result_string);
static bool post_and_get_str(std::string host, std::string path, const std::map<std::string, std::string> & headers, const std::string& body,std::string &result_string);
};
#endif
http_util.cpp
#include "http/http_util.h"
#include <iostream>
#include <fstream>
bool HttpUtil::get(std::string url, std::string path, std::string result_name) {
try {
httplib::Client cli {url};
cli.set_connection_timeout(ConnectionTimeout);
auto res = cli.Get(path.c_str());
if(res->status != 200) {
std::cerr << "Get [" << url << path << "] failed" << std::endl;
std::cerr << "Status code : [" << res->status << "]" << "\n" << "Result body : [" << res->body << "]"
<< std::endl;
return false;
}
std::ofstream os {result_name, std::ios_base::out | std::ios_base::binary};
os << res->body;
} catch(const std::exception & e) {
std::cerr << "Exception: " << e.what() << std::endl;
return false;
}
return true;
}
bool HttpUtil::get_file(std::string host, std::string path, std::string result_name) {
try {
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
auto port = 443;
httplib::SSLClient cli(host, port);
#else
auto port = 80;
httplib::Client cli(host, port);
#endif
cli.set_connection_timeout(ConnectionTimeout);
std::ofstream os {result_name};
auto res = cli.Get(path.c_str(),
[&](const char *data, size_t data_length) {
os << std::string(data, data_length);
return true;
});
if(!res || res->status != 200) {
return false;
}
} catch(const std::exception & e) {
std::cerr << "Exception: " << e.what() << std::endl;
return false;
}
return true;
}
bool HttpUtil::get_str(std::string host, std::string path, const std::map<std::string, std::string> & headers, std::string &result_string) {
try {
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
auto port = 443;
httplib::SSLClient cli(host, port);
#else
auto port = 80;
httplib::Client cli(host, port);
#endif
cli.set_connection_timeout(ConnectionTimeout);
httplib::Headers headers_ {};
for(auto&& item: headers) {
headers_.insert(item);
}
auto res = cli.Get(path.c_str(), headers_);
result_string = res->body;
return true;
} catch(const std::exception & e) {
std::cerr << "Exception: " << e.what() << std::endl;
return false;
}
}
bool HttpUtil::post_and_get_str(std::string host, std::string path, const std::map<std::string, std::string> & headers, const std::string& body, std::string &result_string){
try {
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
auto port = 443;
httplib::SSLClient cli(host, port);
#else
auto port = 80;
httplib::Client cli(host, port);
#endif
cli.set_connection_timeout(ConnectionTimeout);
httplib::Headers headers_ {};
for(auto&& item: headers) {
headers_.insert(item);
}
auto res = cli.Post(path.c_str(), headers_, body.c_str(), body.size(), "application/json");
result_string = res->body;
return true;
} catch(const std::exception & e) {
std::cerr << "Exception: " << e.what() << std::endl;
return false;
}
}
程序效果如下,
image.png