利用VS2019+MKL加速矩阵运算。
软件版本
Intel OneAP 2022.0.2
Visual Studio 2019 Community
工程配置
-
新建空工程
-
打开OneAPI选项
- 加入代码(官方示例)
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include "mkl.h"
void print_arr(int N, char* name, double* array);
void init_arr(int N, double* a);
void Dgemm_multiply(double* a, double* b, double* c, int N);
int main(int argc, char* argv[])
{
clock_t start, stop;
int i, j;
int N;
double* a;
double* b;
double* c;
if (argc < 2)
{
printf("Enter matrix size N=");
//please enter small number first to ensure that the
//multiplication is correct! and then you may enter
//a "reasonably" large number say like 500 or even 1000
scanf("%d", &N);
}
else
{
N = atoi(argv[1]);
}
a = (double*)malloc(sizeof(double) * N * N);
b = (double*)malloc(sizeof(double) * N * N);
c = (double*)malloc(sizeof(double) * N * N);
init_arr(N, a);
init_arr(N, b);
//DGEMM Multiply
//reallocate to force cash to be flushed
a = (double*)malloc(sizeof(double) * N * N);
b = (double*)malloc(sizeof(double) * N * N);
c = (double*)malloc(sizeof(double) * N * N);
init_arr(N, a);
init_arr(N, b);
start = clock();
//for(i=0;i<1000;i++)
Dgemm_multiply(a, b, c, N);
stop = clock();
printf("Dgemm_multiply(). Elapsed time = %g seconds\n",
((double)(stop - start)) / CLOCKS_PER_SEC);
//print simple test case of data to be sure multiplication is correct
if (N < 7) {
print_arr(N, "a", a);
print_arr(N, "b", b);
print_arr(N, "c", c);
}
free(a);
free(b);
free(c);
return 0;
}
//DGEMM way. The PREFERED way, especially for large matrices
void Dgemm_multiply(double* a, double* b, double* c, int N)
{
double alpha = 1.0, beta = 0.;
int incx = 1;
int incy = N;
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, N, N, N, alpha, b, N, a, N, beta, c, N);
}
//initialize array with random data
void init_arr(int N, double* a)
{
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
a[i * N + j] = (i + j + 1) % 10; //keep all entries less than 10. pleasing to the eye!
}
}
}
//print array to std out
void print_arr(int N, char* name, double* array)
{
int i, j;
printf("\n%s\n", name);
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%g\t", array[N * i + j]);
}
printf("\n");
}
}
运行和调试
-
偶然出现的奇怪问题,在设置OneAPI方式为Sequential的时候,程序一直可以运行,但是OneAPI设置为Parallel后,输入的N只能是小于15(多次实测),不然就会报错!
解决方法,在debug出来的exe文件夹下放入libiomp5md.dll
点亮“在看”
欢迎关注、收藏、转载哈!
最有意思的GNSS NEWS…不定期更新…