编程题目如下:
实现如下功能:
步骤1、提供一个函数,实现生成指定长度范围的字符串,长度为随机的10-60字节(含10和60),每个字符串的内容为小写的'a'-'z'中随机选择。
步骤2、初始化一个有1000万个单元的数组,每个数组单元指向一个步骤1中随机生成的字符串。
步骤3、开始计时1
将步骤2中的1000万个随机字符串(作为key,value是8字节的整数,存放字符串的CRC32哈希值)插入到自己实现的哈希表中(不得用指针指向步骤2中的字符串,哈希表中需要自己存储字符串副本),
结束计时1,记录插入完成所有1000万个单元所需的时间T1。
步骤4、开始计时2
查询数组中1000万个单元对应的字符串在哈希表中的VALUE(不得直接基于CRC32来获取,必须通过哈希表来查找到),
结束计时2,记录完成1000万次查询所需的时间T2。
所有开发人员参加,其他人员可自主决定是否参加,编程语言不限。结果需保证正确,
取T1+T2总和最小的前6名,进入最终排名,性能占比60%,内存资源占用40%,如假定最快10秒完成得60分,则20秒得30分,内存最小占用100M,得40分,则200M得20分,以此类推。
不得使用数据压缩来降低内存消耗。
乔阳准备一台LINUX X86作为对比平台,支持C/C++、JAVA、GO等编译运行环境。
// hashtablepro.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include <iostream>
#include <time.h>
#include <random>
#include <string.h>
#include <thread>
#include <atomic>
#ifdef WIN32
#include "windows.h"
#else
#include <time.h>
#include <sys/time.h>
#endif
#define CRC32_POLYNOMIAL 0xEDB88320
#define N_THREAD 80
#ifdef WIN32
typedef __int64 sint64;
typedef unsigned __int64 uint64;
#else
typedef long long int sint64;
typedef unsigned long long int uint64;
#endif
typedef struct hashnode hnode;
typedef struct hashtable htable;
typedef struct task itask;
struct hashnode
{
char* key;
unsigned int crc;
hnode* next;
};
struct hashtable
{
void** array;
int cap, size;
};
struct task
{
int type;
int index;
int batch;
};
using namespace std;
int g_asize = 10000000;
char** g_array = NULL;
void** g_hnode = NULL;
hashtable* g_ht[1000] = { 0 };
itask* g_tsk[1000] = { 0 };
std::atomic<int> counter(0);
int exit_flag = 0;
/////////////////////////////crc32 module////////////////////////////////////////
unsigned int get_crc32(const char *msg)
{
int crc = 0xFFFFFFFF;
int i, j, len = strlen(msg);
for (i = 0; i < len; i++) {
crc ^= msg[i];
for (j = 0; j < 8; j++) {
if (crc & 0x01)
crc = (crc >> 1) ^ CRC32_POLYNOMIAL;
else
crc = crc >> 1;
}
}
return (unsigned int)(crc ^ 0x7FFFFFFF);
}
/////////////////////////////hashtable module////////////////////////////////////////
hashtable* hashtable_init(int size)
{
hashtable* ht;
int cap = (int)(size * 1.19);
ht = (hashtable*)malloc(sizeof(hashtable) + sizeof(void*) * cap);
ht->array = (void**)(ht + 1);
ht->size = 0;
ht->cap = cap;
for (int i = 0; i < cap; i++)
{
ht->array[i] = NULL;
}
return ht;
}
void hashtable_deinit(hashtable* ht)
{
int i;
hashnode* node;
hashnode* next = NULL;
// to release each bucket
for (i = 0; i < ht->cap; i++)
{
node = (hashnode*)ht->array[i];
while (node)
{
next = node->next;
free(node);
node = next;
}
}
free(ht);
}
hashnode* hashnode_get(const char* key)
{
hashnode* hn;
again:
hn = (hashnode*)malloc(sizeof(hashnode) + strlen(key) + 1);
if (!hn) goto again;
hn->crc = get_crc32(key);
hn->next = NULL;
hn->key = (char*)(hn + 1);
strcpy(hn->key, key);
return hn;
}
int hashtable_insert(hashtable* ht, int i, const char* key)
{
int index;
hashnode* cell = NULL;
//hashnode* hn = hashnode_get(key);
hashnode* hn = (hashnode*)g_hnode[i];
index = hn->crc % ht->cap;
cell = (hashnode*)ht->array[index];
if (!cell)
{
ht->array[index] = hn;
}
else
{
hn->next = cell;
ht->array[index] = hn;
}
ht->size++;
return 0;
}
int hashtable_get(hashtable* ht, const char* key)
{
hashnode* cell = NULL;
int crc = -1;
int index = get_crc32(key) % ht->cap;
cell = (hashnode*)ht->array[index];
while (cell)
{
if (strcmp(key, cell->key) == 0)
{
crc = cell->crc;
break;
}
cell = cell->next;
}
return crc;
}
/////////////////////////////phase module////////////////////////////////////////
char* phase_i_gen_string()
{
int len = rand() % 51 + 10;
int i;
char* str = (char*)malloc(len + 1);
str[len] = '\0';
for (i = 0; i < len; i++)
{
str[i] = rand() % 26 + 'a';
}
return str;
}
int phase_ii_gen_array()
{
int i, count = g_asize;
hashnode* hn;
if (!g_array)
{
g_array = (char**)malloc(sizeof(char*) * count);
g_hnode = (void**)malloc(sizeof(void*) * count);
}
for (i = 0; i < count; i++)
{
g_array[i] = phase_i_gen_string();
g_hnode[i] = (void*)malloc(sizeof(hashnode) + strlen(g_array[i]) + 1);
hn = (hashnode*)g_hnode[i];
{
hn->crc = get_crc32(g_array[i]);
hn->next = NULL;
hn->key = (char*)(hn + 1);
strcpy(hn->key, g_array[i]);
}
// std::cout << g_array[i] << std::endl;
}
return 0;
}
int phase_iii_hashtable_put(hashtable* ht)
{
int i;
for (i = 0; i < g_asize; i++)
{
hashtable_insert(ht, i, g_array[i]);
}
std::cout << "input " << ht->size << " entry.\n";
return ht->size;
}
void hashtable_put_run(int ithread, int index, int size)
{
hashtable* ht;
int last = index + size;
if (ithread == N_THREAD - 1)
{
size = g_asize - index;
last = index + size;
}
ht = hashtable_init(size);
for (int i = index; i < last; i++)
{
hashtable_insert(ht, i, g_array[i]);
}
g_ht[ithread] = ht;
return ;
}
int phase_iii_hashtable_put_ex()
{
int i;
int batch = g_asize / N_THREAD;
if (g_asize % N_THREAD)
{
batch += batch / N_THREAD;
}
std::vector<std::thread> vec_thrd;
for (i = 0; i < N_THREAD; i ++)
{
vec_thrd.push_back(std::thread(hashtable_put_run, i, i * batch, batch));
}
for (std::thread& t : vec_thrd)
{
t.join();
}
return 0;
}
int phase_iii_hashtable_put_max()
{
itask task[N_THREAD];
int batch = g_asize / N_THREAD;
int count = N_THREAD;
if (g_asize % N_THREAD)
{
batch += batch / N_THREAD;
}
for (int i = 0; i < N_THREAD; i++)
{
task[i].type = 0;
task[i].index = i * batch;
task[i].batch = batch;
g_tsk[i] = &task[i];
counter.fetch_add(1);
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
while (counter.fetch_add(0))
{
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
return 0;
}
int phase_iv_hashtable_get(hashtable* ht, char** array, int size, int** crc_arr)
{
int* hc_arr;
int i;
hc_arr = (int*)malloc(sizeof(int) * size);
for (i = 0; i < size; i++)
{
hc_arr[i] = hashtable_get(ht, array[i]);
}
*crc_arr = hc_arr;
return 0;
}
int hashtable_get_run(int ithread, int index, int batch)
{
int last = index + batch;
int crc;
if (ithread == N_THREAD - 1)
{
last = g_asize - index;
}
for (int i = index; i < last; i++)
{
crc = hashtable_get(g_ht[ithread], g_array[i]);
// if (crc_arr[i] != get_crc32(g_array[i]))
// {
// printf("error.\n");
// }
}
return 0;
}
int phase_iv_hashtable_get_max()
{
itask task[N_THREAD];
int batch = g_asize / N_THREAD;
if (g_asize % N_THREAD)
{
batch += batch / N_THREAD;
}
for (int i = 0; i < N_THREAD; i++)
{
task[i].type = 1;
task[i].index = i * batch;
task[i].batch = batch;
g_tsk[i] = &task[i];
counter.fetch_add(1);
}
std::this_thread::sleep_for(std::chrono::milliseconds(100));
while (counter.fetch_add(0))
{
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
return 0;
}
int phase_iv_hashtable_get_ex(int* crc_arr)
{
std::vector<std::thread> vec_thrd;
int batch = g_asize / N_THREAD;
if (g_asize % N_THREAD)
{
batch += batch / N_THREAD;
}
for (int i = 0; i < N_THREAD; i++)
{
vec_thrd.push_back(std::thread(hashtable_get_run, i, i * batch, batch));
}
for (std::thread& t : vec_thrd)
{
t.join();
}
return 0;
}
void hashtable_run(int ithread)
{
itask* task;
while (1)
{
if (exit_flag)
break;
task = g_tsk[ithread];
if (!task)
{
std::this_thread::sleep_for(chrono::microseconds(200));
continue;
}
if (!task->type)
{
hashtable_put_run(ithread, task->index, task->batch);
}
else
{
hashtable_get_run(ithread, task->index, task->batch);
}
g_tsk[ithread] = NULL;
counter.fetch_sub(1);
}
return;
}
uint64 get_tick()
{
#ifdef WIN32
return GetTickCount();
#else
struct timeval tm;
gettimeofday(&tm, NULL);
return tm.tv_sec * 1000 + tm.tv_usec / 1000;
#endif
}
int main()
{
hashtable* htable;
int* crc_vec;
uint64 t1, t2, t3;
char mode;
std::vector<std::thread> vec_thrd;
std::cout << "Ready Go...\n";
std::cout << "run mode, s, m, x.\n";
mode = getchar();
srand((unsigned int)time(NULL));
htable = hashtable_init(g_asize);
crc_vec = (int*)malloc(sizeof(int) * g_asize);
std::cout << "phase ii start.\n";
phase_ii_gen_array();
for (int i= 0; i < N_THREAD; i++)
{
vec_thrd.push_back(std::thread(hashtable_run, i));
}
t1 = get_tick();
std::cout << "phase iii start at " << t1 << ".\n";
if (mode == 'm')
phase_iii_hashtable_put_ex();
else if (mode == 'x')
phase_iii_hashtable_put_max();
else
phase_iii_hashtable_put(htable);
t2 = get_tick();
std::cout << "phase iv start at " << t2 << ".\n";
if (mode == 'm')
phase_iv_hashtable_get_ex(crc_vec);
else if (mode = 'x')
phase_iv_hashtable_get_max();
else
phase_iv_hashtable_get(htable, g_array, g_asize, &crc_vec);
t3 = get_tick();
std::cout << "phase iv finish at " << t3 << ".\n";
std::cout << "put spent " << t2 - t1 << "ms\n";
std::cout << "get spent " << t3 - t2 << "ms.\n";
while (1)
{
char c = getchar();
if (c == 'e')
{
exit_flag = 1;
for (std::thread& t : vec_thrd)
{
t.join();
}
std::cout << "Game Over!\n";
break;
}
continue;
}
return 0;
}
获得第三名,还算过得去。