10亿条数据应用在 sigmoid函数上(渐近方式参考:https://zhuanlan.zhihu.com/p/318423774), 验证结论:向量版本比标准库版本平均提升5.47倍
1.1 绝对误差< 0.019(ABSOLUTE_ERROR = 0.019)
root@sun-PowerEdge-R630:/opt/test# clang++ -O3 -mavx2 -ffast-math -mfma -march=native -o sum sum.cpp
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12882 ms
向量化运算耗时: 2325 ms
加速比: 5.54065x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12772 ms
向量化运算耗时: 2327 ms
加速比: 5.48861x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12720 ms
向量化运算耗时: 2330 ms
加速比: 5.45923x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12707 ms
向量化运算耗时: 2328 ms
加速比: 5.45833x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12706 ms
向量化运算耗时: 2322 ms
加速比: 5.47201x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12695 ms
向量化运算耗时: 2329 ms
加速比: 5.45084x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12713 ms
向量化运算耗时: 2328 ms
加速比: 5.46091x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 13030 ms
向量化运算耗时: 2374 ms
加速比: 5.48863x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12888 ms
向量化运算耗时: 2331 ms
加速比: 5.52896x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12733 ms
向量化运算耗时: 2334 ms
加速比: 5.45544x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12854 ms
向量化运算耗时: 2344 ms
加速比: 5.48379x
通过比: 1
1.2 计算sigmoid函数
#include <immintrin.h>
#include <cmath>
#include <iostream>
#include <chrono>
#include <cstdlib>
#include <cstring>
#define DATA_SIZE 1000000000 // 10亿数据
#define ALIGNMENT 32 // 内存对齐要求
#define UNROLL_FACTOR 4 // 循环展开因子
#define SIMD_STEP (8 * UNROLL_FACTOR) // 每次处理32个元素
#define ABSOLUTE_ERROR 0.019f // 绝对误差 (|actual - expect|)
float random_float() {
union {
uint32_t i;
float f;
} u;
do {
// 生成随机位模式(排除NaN和无穷大)
u.i = (rand() & 0x7FFFFF) | ((rand() % 254 + 1) << 23); // 指数范围[1,254]
if(rand() % 10 == 0) u.i |= 0x80000000; // 10%概率为负数
} while(!std::isfinite(u.f)); // 确保生成合法数值
return u.f;
}
// 向量化Sigmoid函数
__m256 mm256_sigmoid_ps(__m256 x) {
const __m256 abs_x = _mm256_and_ps(x, _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)));
const __m256 zero = _mm256_setzero_ps();
const __m256 one = _mm256_set1_ps(1.0f);
// 各区间阈值
const __m256 thr5 = _mm256_set1_ps(5.0f);
const __m256 thr2_375 = _mm256_set1_ps(2.375f);
const __m256 thr1 = one;
// 生成区间掩码
__m256 mask_2_375 = _mm256_and_ps(
_mm256_cmp_ps(abs_x, thr2_375, _CMP_GE_OQ),
_mm256_cmp_ps(abs_x, thr5, _CMP_LT_OQ)
);
__m256 mask_1 = _mm256_and_ps(
_mm256_cmp_ps(abs_x, thr1, _CMP_GE_OQ),
_mm256_cmp_ps(abs_x, thr2_375, _CMP_LT_OQ)
);
__m256 mask_abs0 = _mm256_cmp_ps(abs_x, thr1, _CMP_LT_OQ);
__m256 mask_0 = _mm256_cmp_ps(x, zero, _CMP_LT_OQ);
// 分段计算
__m256 result = _mm256_set1_ps(1.0f); // |x| >=5
// 0 <= |x| <1: 0.25*|x| + 0.5
__m256 p3 = _mm256_fmadd_ps(abs_x, _mm256_set1_ps(0.25f), _mm256_set1_ps(0.5f));
result = _mm256_blendv_ps(result, p3, mask_abs0);
// 1 <= |x| <2.375: 0.125*|x| + 0.625
__m256 p2 = _mm256_fmadd_ps(abs_x, _mm256_set1_ps(0.125f), _mm256_set1_ps(0.625f));
result = _mm256_blendv_ps(result, p2, mask_1);
// 2.375 <= |x| <5: 0.03125*|x| + 0.84375
__m256 p1 = _mm256_fmadd_ps(abs_x, _mm256_set1_ps(0.03125f), _mm256_set1_ps(0.84375f));
result = _mm256_blendv_ps(result, p1, mask_2_375);
// 处理负数: 1 - result
__m256 neg_result = _mm256_sub_ps(one, result);
return _mm256_blendv_ps(result, neg_result, mask_0);
}
int main() {
// 1. 分配对齐内存
float* input = (float*)_mm_malloc(DATA_SIZE * sizeof(float), ALIGNMENT);
float* output_scalar = (float*)_mm_malloc(DATA_SIZE * sizeof(float), ALIGNMENT);
float* output_vector = (float*)_mm_malloc(DATA_SIZE * sizeof(float), ALIGNMENT);
// 2. 初始化输入数据
srand(time(nullptr));
for (size_t i = 0; i < DATA_SIZE; ++i) {
input[i] = random_float();
}
// ================== 标量版本 ==================
auto start_scalar = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < DATA_SIZE; ++i) {
output_scalar[i] = 1.0f / (1.0f + expf(-input[i]));
}
auto end_scalar = std::chrono::high_resolution_clock::now();
double scalar_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_scalar - start_scalar).count();
// ================== 向量化版本 ==================
auto start_vector = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < DATA_SIZE; i += SIMD_STEP) {
// 预取指令
// x86-64系统缓存行为64字节(shell中敲getconf LEVEL1_DCACHE_LINESIZE查看)
// 预取32个float
_mm_prefetch((const char*)(input + i + SIMD_STEP), _MM_HINT_T0); //预取16个float
_mm_prefetch((const char*)(input + i + 2 * SIMD_STEP), _MM_HINT_T0); //预取16个float
// 加载数据
__m256 x0 = _mm256_load_ps(input + i);
__m256 x1 = _mm256_load_ps(input + i + 8);
__m256 x2 = _mm256_load_ps(input + i + 16);
__m256 x3 = _mm256_load_ps(input + i + 24);
// 向量化计算
__m256 res0 = mm256_sigmoid_ps(x0);
__m256 res1 = mm256_sigmoid_ps(x1);
__m256 res2 = mm256_sigmoid_ps(x2);
__m256 res3 = mm256_sigmoid_ps(x3);
// 存储结果
_mm256_store_ps(output_vector + i, res0);
_mm256_store_ps(output_vector + i + 8, res1);
_mm256_store_ps(output_vector + i + 16, res2);
_mm256_store_ps(output_vector + i + 24, res3);
}
auto end_vector = std::chrono::high_resolution_clock::now();
double vector_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_vector - start_vector).count();
// ================== 验证结果 ==================
bool verify_ok = true;
unsigned int cnt = 0;
for (size_t i = 0; i < DATA_SIZE; ++i) {
float expected = output_scalar[i];
float actual = output_vector[i];
// 绝对误差
float absolute_error = fabs(expected - actual);
if (absolute_error > ABSOLUTE_ERROR) {
if (verify_ok)
{
std::cerr << "验证失败: i=" << i
<< " x=" << input[i]
<< " expected=" << expected
<< " actual=" << actual
<< " 绝对误差=" << absolute_error << "\n";
}
verify_ok = false;
cnt++;
}
}
// ================== 性能报告 ==================
std::cout << "===== 性能报告 =====" << "\n";
std::cout << "数据量: " << DATA_SIZE/1e6 << " Million\n";
std::cout << "标量运算耗时: " << scalar_time << " ms\n";
std::cout << "向量化运算耗时: " << vector_time << " ms\n";
std::cout << "加速比: " << scalar_time / vector_time << "x\n";
std::cout << "通过比: " << (double)(DATA_SIZE - cnt)/(double)DATA_SIZE << "\n";
//清空非法缓存
_mm_clflush(input + DATA_SIZE + SIMD_STEP);
_mm_clflush(input + DATA_SIZE + 2 * SIMD_STEP);
// 释放内存
_mm_free(input);
_mm_free(output_scalar);
_mm_free(output_vector);
return 0;
}
汇编代码
root@sun-PowerEdge-R630:/opt/test# objdump -d sum -M intel
sum: file format elf64-x86-64
Disassembly of section .init:
0000000000401000 <_init>:
401000: f3 0f 1e fa endbr64
401004: 48 83 ec 08 sub rsp,0x8
401008: 48 8b 05 e9 3f 00 00 mov rax,QWORD PTR [rip+0x3fe9] # 404ff8 <__gmon_start__>
40100f: 48 85 c0 test rax,rax
401012: 74 02 je 401016 <_init+0x16>
401014: ff d0 call rax
401016: 48 83 c4 08 add rsp,0x8
40101a: c3 ret
Disassembly of section .plt:
0000000000401020 <.plt>:
401020: ff 35 e2 3f 00 00 push QWORD PTR [rip+0x3fe2] # 405008 <_GLOBAL_OFFSET_TABLE_+0x8>
401026: ff 25 e4 3f 00 00 jmp QWORD PTR [rip+0x3fe4] # 405010 <_GLOBAL_OFFSET_TABLE_+0x10>
40102c: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
0000000000401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>:
401030: ff 25 e2 3f 00 00 jmp QWORD PTR [rip+0x3fe2] # 405018 <_ZNSt6chrono3_V212system_clock3nowEv@GLIBCXX_3.4.19>
401036: 68 00 00 00 00 push 0x0
40103b: e9 e0 ff ff ff jmp 401020 <.plt>
0000000000401040 <rand@plt>:
401040: ff 25 da 3f 00 00 jmp QWORD PTR [rip+0x3fda] # 405020 <rand@GLIBC_2.2.5>
401046: 68 01 00 00 00 push 0x1
40104b: e9 d0 ff ff ff jmp 401020 <.plt>
0000000000401050 <posix_memalign@plt>:
401050: ff 25 d2 3f 00 00 jmp QWORD PTR [rip+0x3fd2] # 405028 <posix_memalign@GLIBC_2.2.5>
401056: 68 02 00 00 00 push 0x2
40105b: e9 c0 ff ff ff jmp 401020 <.plt>
0000000000401060 <_ZNSo9_M_insertImEERSoT_@plt>:
401060: ff 25 ca 3f 00 00 jmp QWORD PTR [rip+0x3fca] # 405030 <_ZNSo9_M_insertImEERSoT_@GLIBCXX_3.4.9>
401066: 68 03 00 00 00 push 0x3
40106b: e9 b0 ff ff ff jmp 401020 <.plt>
0000000000401070 <__cxa_atexit@plt>:
401070: ff 25 c2 3f 00 00 jmp QWORD PTR [rip+0x3fc2] # 405038 <__cxa_atexit@GLIBC_2.2.5>
401076: 68 04 00 00 00 push 0x4
40107b: e9 a0 ff ff ff jmp 401020 <.plt>
0000000000401080 <time@plt>:
401080: ff 25 ba 3f 00 00 jmp QWORD PTR [rip+0x3fba] # 405040 <time@GLIBC_2.2.5>
401086: 68 05 00 00 00 push 0x5
40108b: e9 90 ff ff ff jmp 401020 <.plt>
0000000000401090 <srand@plt>:
401090: ff 25 b2 3f 00 00 jmp QWORD PTR [rip+0x3fb2] # 405048 <srand@GLIBC_2.2.5>
401096: 68 06 00 00 00 push 0x6
40109b: e9 80 ff ff ff jmp 401020 <.plt>
00000000004010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>:
4010a0: ff 25 aa 3f 00 00 jmp QWORD PTR [rip+0x3faa] # 405050 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@GLIBCXX_3.4.9>
4010a6: 68 07 00 00 00 push 0x7
4010ab: e9 70 ff ff ff jmp 401020 <.plt>
00000000004010b0 <free@plt>:
4010b0: ff 25 a2 3f 00 00 jmp QWORD PTR [rip+0x3fa2] # 405058 <free@GLIBC_2.2.5>
4010b6: 68 08 00 00 00 push 0x8
4010bb: e9 60 ff ff ff jmp 401020 <.plt>
00000000004010c0 <_ZNSt8ios_base4InitC1Ev@plt>:
4010c0: ff 25 9a 3f 00 00 jmp QWORD PTR [rip+0x3f9a] # 405060 <_ZNSt8ios_base4InitC1Ev@GLIBCXX_3.4>
4010c6: 68 09 00 00 00 push 0x9
4010cb: e9 50 ff ff ff jmp 401020 <.plt>
00000000004010d0 <_ZNSo9_M_insertIdEERSoT_@plt>:
4010d0: ff 25 92 3f 00 00 jmp QWORD PTR [rip+0x3f92] # 405068 <_ZNSo9_M_insertIdEERSoT_@GLIBCXX_3.4.9>
4010d6: 68 0a 00 00 00 push 0xa
4010db: e9 40 ff ff ff jmp 401020 <.plt>
00000000004010e0 <expf@plt>:
4010e0: ff 25 8a 3f 00 00 jmp QWORD PTR [rip+0x3f8a] # 405070 <expf@GLIBC_2.27>
4010e6: 68 0b 00 00 00 push 0xb
4010eb: e9 30 ff ff ff jmp 401020 <.plt>
00000000004010f0 <_ZNSt8ios_base4InitD1Ev@plt>:
4010f0: ff 25 82 3f 00 00 jmp QWORD PTR [rip+0x3f82] # 405078 <_ZNSt8ios_base4InitD1Ev@GLIBCXX_3.4>
4010f6: 68 0c 00 00 00 push 0xc
4010fb: e9 20 ff ff ff jmp 401020 <.plt>
Disassembly of section .text:
0000000000401100 <set_fast_math>:
401100: f3 0f 1e fa endbr64
401104: 0f ae 5c 24 fc stmxcsr DWORD PTR [rsp-0x4]
401109: 81 4c 24 fc 40 80 00 or DWORD PTR [rsp-0x4],0x8040
401110: 00
401111: 0f ae 54 24 fc ldmxcsr DWORD PTR [rsp-0x4]
401116: c3 ret
401117: 66 0f 1f 84 00 00 00 nop WORD PTR [rax+rax*1+0x0]
40111e: 00 00
0000000000401120 <_GLOBAL__sub_I_sum.cpp>:
401120: 50 push rax
401121: bf f1 52 40 00 mov edi,0x4052f1
401126: e8 95 ff ff ff call 4010c0 <_ZNSt8ios_base4InitC1Ev@plt>
40112b: bf f0 10 40 00 mov edi,0x4010f0
401130: be f1 52 40 00 mov esi,0x4052f1
401135: ba 88 50 40 00 mov edx,0x405088
40113a: 58 pop rax
40113b: e9 30 ff ff ff jmp 401070 <__cxa_atexit@plt>
0000000000401140 <_start>:
401140: f3 0f 1e fa endbr64
401144: 31 ed xor ebp,ebp
401146: 49 89 d1 mov r9,rdx
401149: 5e pop rsi
40114a: 48 89 e2 mov rdx,rsp
40114d: 48 83 e4 f0 and rsp,0xfffffffffffffff0
401151: 50 push rax
401152: 54 push rsp
401153: 49 c7 c0 80 21 40 00 mov r8,0x402180
40115a: 48 c7 c1 10 21 40 00 mov rcx,0x402110
401161: 48 c7 c7 a0 13 40 00 mov rdi,0x4013a0
401168: ff 15 82 3e 00 00 call QWORD PTR [rip+0x3e82] # 404ff0 <__libc_start_main@GLIBC_2.2.5>
40116e: f4 hlt
40116f: 90 nop
0000000000401170 <_dl_relocate_static_pie>:
401170: f3 0f 1e fa endbr64
401174: c3 ret
401175: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40117c: 00 00 00
40117f: 90 nop
0000000000401180 <deregister_tm_clones>:
401180: b8 90 50 40 00 mov eax,0x405090
401185: 48 3d 90 50 40 00 cmp rax,0x405090
40118b: 74 13 je 4011a0 <deregister_tm_clones+0x20>
40118d: b8 00 00 00 00 mov eax,0x0
401192: 48 85 c0 test rax,rax
401195: 74 09 je 4011a0 <deregister_tm_clones+0x20>
401197: bf 90 50 40 00 mov edi,0x405090
40119c: ff e0 jmp rax
40119e: 66 90 xchg ax,ax
4011a0: c3 ret
4011a1: 66 66 2e 0f 1f 84 00 data16 nop WORD PTR cs:[rax+rax*1+0x0]
4011a8: 00 00 00 00
4011ac: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
00000000004011b0 <register_tm_clones>:
4011b0: be 90 50 40 00 mov esi,0x405090
4011b5: 48 81 ee 90 50 40 00 sub rsi,0x405090
4011bc: 48 89 f0 mov rax,rsi
4011bf: 48 c1 ee 3f shr rsi,0x3f
4011c3: 48 c1 f8 03 sar rax,0x3
4011c7: 48 01 c6 add rsi,rax
4011ca: 48 d1 fe sar rsi,1
4011cd: 74 11 je 4011e0 <register_tm_clones+0x30>
4011cf: b8 00 00 00 00 mov eax,0x0
4011d4: 48 85 c0 test rax,rax
4011d7: 74 07 je 4011e0 <register_tm_clones+0x30>
4011d9: bf 90 50 40 00 mov edi,0x405090
4011de: ff e0 jmp rax
4011e0: c3 ret
4011e1: 66 66 2e 0f 1f 84 00 data16 nop WORD PTR cs:[rax+rax*1+0x0]
4011e8: 00 00 00 00
4011ec: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
00000000004011f0 <__do_global_dtors_aux>:
4011f0: f3 0f 1e fa endbr64
4011f4: 80 3d f5 40 00 00 00 cmp BYTE PTR [rip+0x40f5],0x0 # 4052f0 <completed.8061>
4011fb: 75 13 jne 401210 <__do_global_dtors_aux+0x20>
4011fd: 55 push rbp
4011fe: 48 89 e5 mov rbp,rsp
401201: e8 7a ff ff ff call 401180 <deregister_tm_clones>
401206: c6 05 e3 40 00 00 01 mov BYTE PTR [rip+0x40e3],0x1 # 4052f0 <completed.8061>
40120d: 5d pop rbp
40120e: c3 ret
40120f: 90 nop
401210: c3 ret
401211: 66 66 2e 0f 1f 84 00 data16 nop WORD PTR cs:[rax+rax*1+0x0]
401218: 00 00 00 00
40121c: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
0000000000401220 <frame_dummy>:
401220: f3 0f 1e fa endbr64
401224: eb 8a jmp 4011b0 <register_tm_clones>
401226: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40122d: 00 00 00
0000000000401230 <_Z12random_floatv>:
401230: 53 push rbx
401231: 48 83 ec 10 sub rsp,0x10
401235: c4 e2 79 58 05 c6 1d vpbroadcastd xmm0,DWORD PTR [rip+0x1dc6] # 403004 <_IO_stdin_used+0x4>
40123c: 00 00
40123e: c5 f9 7f 04 24 vmovdqa XMMWORD PTR [rsp],xmm0
401243: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40124a: 00 00 00
40124d: 0f 1f 00 nop DWORD PTR [rax]
401250: e8 eb fd ff ff call 401040 <rand@plt>
401255: 89 c3 mov ebx,eax
401257: 81 e3 ff ff 7f 00 and ebx,0x7fffff
40125d: e8 de fd ff ff call 401040 <rand@plt>
401262: 48 98 cdqe
401264: 48 69 c8 09 04 02 81 imul rcx,rax,0xffffffff81020409
40126b: 48 c1 e9 20 shr rcx,0x20
40126f: 01 c1 add ecx,eax
401271: 89 ca mov edx,ecx
401273: c1 ea 1f shr edx,0x1f
401276: c1 f9 07 sar ecx,0x7
401279: 01 d1 add ecx,edx
40127b: 89 ca mov edx,ecx
40127d: c1 e2 08 shl edx,0x8
401280: 89 ce mov esi,ecx
401282: 29 d6 sub esi,edx
401284: 01 ce add esi,ecx
401286: 01 c6 add esi,eax
401288: c1 e6 17 shl esi,0x17
40128b: 01 f3 add ebx,esi
40128d: 81 c3 00 00 80 00 add ebx,0x800000
401293: e8 a8 fd ff ff call 401040 <rand@plt>
401298: 69 c0 cd cc cc cc imul eax,eax,0xcccccccd
40129e: 05 98 99 99 19 add eax,0x19999998
4012a3: c4 e3 7b f0 c0 01 rorx eax,eax,0x1
4012a9: 89 d9 mov ecx,ebx
4012ab: 81 c9 00 00 00 80 or ecx,0x80000000
4012b1: 3d 99 99 99 19 cmp eax,0x19999999
4012b6: 0f 43 cb cmovae ecx,ebx
4012b9: c5 f9 6e c1 vmovd xmm0,ecx
4012bd: c5 f9 db 0c 24 vpand xmm1,xmm0,XMMWORD PTR [rsp]
4012c2: c5 f8 2e 0d 3e 1d 00 vucomiss xmm1,DWORD PTR [rip+0x1d3e] # 403008 <_IO_stdin_used+0x8>
4012c9: 00
4012ca: 74 84 je 401250 <_Z12random_floatv+0x20>
4012cc: 48 83 c4 10 add rsp,0x10
4012d0: 5b pop rbx
4012d1: c3 ret
4012d2: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
4012d9: 00 00 00
4012dc: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
00000000004012e0 <_Z16mm256_sigmoid_psDv8_f>:
4012e0: c4 e2 7d 18 0d 1b 1d vbroadcastss ymm1,DWORD PTR [rip+0x1d1b] # 403004 <_IO_stdin_used+0x4>
4012e7: 00 00
4012e9: c5 fc 54 c9 vandps ymm1,ymm0,ymm1
4012ed: c4 e2 7d 18 15 16 1d vbroadcastss ymm2,DWORD PTR [rip+0x1d16] # 40300c <_IO_stdin_used+0xc>
4012f4: 00 00
4012f6: c5 ec c2 d9 02 vcmpleps ymm3,ymm2,ymm1
4012fb: c4 e2 7d 18 25 0c 1d vbroadcastss ymm4,DWORD PTR [rip+0x1d0c] # 403010 <_IO_stdin_used+0x10>
401302: 00 00
401304: c5 f4 c2 e4 01 vcmpltps ymm4,ymm1,ymm4
401309: c5 dc 54 db vandps ymm3,ymm4,ymm3
40130d: c4 e2 7d 18 25 fe 1c vbroadcastss ymm4,DWORD PTR [rip+0x1cfe] # 403014 <_IO_stdin_used+0x14>
401314: 00 00
401316: c5 dc c2 e9 02 vcmpleps ymm5,ymm4,ymm1
40131b: c5 f4 c2 d2 01 vcmpltps ymm2,ymm1,ymm2
401320: c4 e2 7d 18 35 ef 1c vbroadcastss ymm6,DWORD PTR [rip+0x1cef] # 403018 <_IO_stdin_used+0x18>
401327: 00 00
401329: c4 e2 7d 18 3d ea 1c vbroadcastss ymm7,DWORD PTR [rip+0x1cea] # 40301c <_IO_stdin_used+0x1c>
401330: 00 00
401332: c4 e2 75 a8 fe vfmadd213ps ymm7,ymm1,ymm6
401337: c5 f4 c2 f4 01 vcmpltps ymm6,ymm1,ymm4
40133c: c4 e3 5d 4a f7 60 vblendvps ymm6,ymm4,ymm7,ymm6
401342: c5 ec 54 d5 vandps ymm2,ymm2,ymm5
401346: c4 e2 7d 18 2d d1 1c vbroadcastss ymm5,DWORD PTR [rip+0x1cd1] # 403020 <_IO_stdin_used+0x20>
40134d: 00 00
40134f: c4 e2 7d 18 3d cc 1c vbroadcastss ymm7,DWORD PTR [rip+0x1ccc] # 403024 <_IO_stdin_used+0x24>
401356: 00 00
401358: c4 e2 75 a8 fd vfmadd213ps ymm7,ymm1,ymm5
40135d: c5 d0 57 ed vxorps xmm5,xmm5,xmm5
401361: c4 e3 4d 4a d7 20 vblendvps ymm2,ymm6,ymm7,ymm2
401367: c4 e2 7d 18 35 b8 1c vbroadcastss ymm6,DWORD PTR [rip+0x1cb8] # 403028 <_IO_stdin_used+0x28>
40136e: 00 00
401370: c4 e2 7d 18 3d b3 1c vbroadcastss ymm7,DWORD PTR [rip+0x1cb3] # 40302c <_IO_stdin_used+0x2c>
401377: 00 00
401379: c4 e2 75 a8 fe vfmadd213ps ymm7,ymm1,ymm6
40137e: c4 e3 6d 4a cf 30 vblendvps ymm1,ymm2,ymm7,ymm3
401384: c5 fc c2 c5 01 vcmpltps ymm0,ymm0,ymm5
401389: c5 dc 5c d1 vsubps ymm2,ymm4,ymm1
40138d: c4 e3 75 4a c2 00 vblendvps ymm0,ymm1,ymm2,ymm0
401393: c3 ret
401394: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
40139b: 00 00 00
40139e: 66 90 xchg ax,ax
00000000004013a0 <main>:
4013a0: 55 push rbp
4013a1: 41 57 push r15
4013a3: 41 56 push r14
4013a5: 41 55 push r13
4013a7: 41 54 push r12
4013a9: 53 push rbx
4013aa: 48 81 ec 88 01 00 00 sub rsp,0x188
4013b1: 48 8d 7c 24 78 lea rdi,[rsp+0x78]
4013b6: be 20 00 00 00 mov esi,0x20
4013bb: ba 00 28 6b ee mov edx,0xee6b2800
4013c0: e8 8b fc ff ff call 401050 <posix_memalign@plt>
4013c5: 45 31 e4 xor r12d,r12d
4013c8: 41 bd 00 00 00 00 mov r13d,0x0
4013ce: 85 c0 test eax,eax
4013d0: 75 05 jne 4013d7 <main+0x37>
4013d2: 4c 8b 6c 24 78 mov r13,QWORD PTR [rsp+0x78]
4013d7: 48 8d 7c 24 78 lea rdi,[rsp+0x78]
4013dc: be 20 00 00 00 mov esi,0x20
4013e1: ba 00 28 6b ee mov edx,0xee6b2800
4013e6: e8 65 fc ff ff call 401050 <posix_memalign@plt>
4013eb: 89 44 24 20 mov DWORD PTR [rsp+0x20],eax
4013ef: 4c 8b 74 24 78 mov r14,QWORD PTR [rsp+0x78]
4013f4: 48 8d 7c 24 78 lea rdi,[rsp+0x78]
4013f9: be 20 00 00 00 mov esi,0x20
4013fe: ba 00 28 6b ee mov edx,0xee6b2800
401403: e8 48 fc ff ff call 401050 <posix_memalign@plt>
401408: 89 c5 mov ebp,eax
40140a: 4c 8b 7c 24 78 mov r15,QWORD PTR [rsp+0x78]
40140f: 31 ff xor edi,edi
401411: e8 6a fc ff ff call 401080 <time@plt>
401416: 89 c7 mov edi,eax
401418: e8 73 fc ff ff call 401090 <srand@plt>
40141d: 0f 1f 00 nop DWORD PTR [rax]
401420: e8 1b fc ff ff call 401040 <rand@plt>
401425: 89 c3 mov ebx,eax
401427: 81 e3 ff ff 7f 00 and ebx,0x7fffff
40142d: e8 0e fc ff ff call 401040 <rand@plt>
401432: 48 98 cdqe
401434: 48 69 c8 09 04 02 81 imul rcx,rax,0xffffffff81020409
40143b: 48 c1 e9 20 shr rcx,0x20
40143f: 01 c1 add ecx,eax
401441: 89 ca mov edx,ecx
401443: c1 ea 1f shr edx,0x1f
401446: c1 f9 07 sar ecx,0x7
401449: 01 d1 add ecx,edx
40144b: 89 ca mov edx,ecx
40144d: c1 e2 08 shl edx,0x8
401450: 89 ce mov esi,ecx
401452: 29 d6 sub esi,edx
401454: 01 ce add esi,ecx
401456: 01 c6 add esi,eax
401458: c1 e6 17 shl esi,0x17
40145b: 01 f3 add ebx,esi
40145d: 81 c3 00 00 80 00 add ebx,0x800000
401463: e8 d8 fb ff ff call 401040 <rand@plt>
401468: 69 c0 cd cc cc cc imul eax,eax,0xcccccccd
40146e: 05 98 99 99 19 add eax,0x19999998
401473: c4 e3 7b f0 c8 01 rorx ecx,eax,0x1
401479: 89 d8 mov eax,ebx
40147b: 0d 00 00 00 80 or eax,0x80000000
401480: 81 f9 99 99 99 19 cmp ecx,0x19999999
401486: 0f 43 c3 cmovae eax,ebx
401489: 89 c1 mov ecx,eax
40148b: 81 e1 ff ff ff 7f and ecx,0x7fffffff
401491: c5 f9 6e c1 vmovd xmm0,ecx
401495: c5 f8 2e 05 6b 1b 00 vucomiss xmm0,DWORD PTR [rip+0x1b6b] # 403008 <_IO_stdin_used+0x8>
40149c: 00
40149d: 74 81 je 401420 <main+0x80>
40149f: 43 89 44 a5 00 mov DWORD PTR [r13+r12*4+0x0],eax
4014a4: 49 ff c4 inc r12
4014a7: 49 81 fc 00 ca 9a 3b cmp r12,0x3b9aca00
4014ae: 0f 85 6c ff ff ff jne 401420 <main+0x80>
4014b4: 31 c0 xor eax,eax
4014b6: 83 7c 24 20 00 cmp DWORD PTR [rsp+0x20],0x0
4014bb: 4c 0f 45 f0 cmovne r14,rax
4014bf: 85 ed test ebp,ebp
4014c1: 4c 0f 45 f8 cmovne r15,rax
4014c5: e8 66 fb ff ff call 401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>
4014ca: 49 89 c4 mov r12,rax
4014cd: b8 00 28 6b ee mov eax,0xee6b2800
4014d2: 4a 8d 0c 28 lea rcx,[rax+r13*1]
4014d6: 49 39 ce cmp r14,rcx
4014d9: 0f 83 da 00 00 00 jae 4015b9 <main+0x219>
4014df: 4c 01 f0 add rax,r14
4014e2: 49 39 c5 cmp r13,rax
4014e5: 0f 83 ce 00 00 00 jae 4015b9 <main+0x219>
4014eb: bb 03 00 00 00 mov ebx,0x3
4014f0: c4 e2 79 18 05 37 1b vbroadcastss xmm0,DWORD PTR [rip+0x1b37] # 403030 <_IO_stdin_used+0x30>
4014f7: 00 00
4014f9: c5 f8 29 44 24 20 vmovaps XMMWORD PTR [rsp+0x20],xmm0
4014ff: 90 nop
401500: c4 c1 7a 10 44 9d f4 vmovss xmm0,DWORD PTR [r13+rbx*4-0xc]
401507: c5 f8 57 44 24 20 vxorps xmm0,xmm0,XMMWORD PTR [rsp+0x20]
40150d: e8 ce fb ff ff call 4010e0 <expf@plt>
401512: c5 fa 10 0d fa 1a 00 vmovss xmm1,DWORD PTR [rip+0x1afa] # 403014 <_IO_stdin_used+0x14>
401519: 00
40151a: c5 fa 58 c1 vaddss xmm0,xmm0,xmm1
40151e: c5 f2 5e c0 vdivss xmm0,xmm1,xmm0
401522: c4 c1 7a 11 44 9e f4 vmovss DWORD PTR [r14+rbx*4-0xc],xmm0
401529: c4 c1 7a 10 44 9d f8 vmovss xmm0,DWORD PTR [r13+rbx*4-0x8]
401530: c5 f8 57 44 24 20 vxorps xmm0,xmm0,XMMWORD PTR [rsp+0x20]
401536: e8 a5 fb ff ff call 4010e0 <expf@plt>
40153b: c5 fa 10 0d d1 1a 00 vmovss xmm1,DWORD PTR [rip+0x1ad1] # 403014 <_IO_stdin_used+0x14>
401542: 00
401543: c5 fa 58 c1 vaddss xmm0,xmm0,xmm1
401547: c5 f2 5e c0 vdivss xmm0,xmm1,xmm0
40154b: c4 c1 7a 11 44 9e f8 vmovss DWORD PTR [r14+rbx*4-0x8],xmm0
401552: c4 c1 7a 10 44 9d fc vmovss xmm0,DWORD PTR [r13+rbx*4-0x4]
401559: c5 f8 57 44 24 20 vxorps xmm0,xmm0,XMMWORD PTR [rsp+0x20]
40155f: e8 7c fb ff ff call 4010e0 <expf@plt>
401564: c5 fa 10 0d a8 1a 00 vmovss xmm1,DWORD PTR [rip+0x1aa8] # 403014 <_IO_stdin_used+0x14>
40156b: 00
40156c: c5 fa 58 c1 vaddss xmm0,xmm0,xmm1
401570: c5 f2 5e c0 vdivss xmm0,xmm1,xmm0
401574: c4 c1 7a 11 44 9e fc vmovss DWORD PTR [r14+rbx*4-0x4],xmm0
40157b: c4 c1 7a 10 44 9d 00 vmovss xmm0,DWORD PTR [r13+rbx*4+0x0]
401582: c5 f8 57 44 24 20 vxorps xmm0,xmm0,XMMWORD PTR [rsp+0x20]
401588: e8 53 fb ff ff call 4010e0 <expf@plt>
40158d: c5 fa 10 0d 7f 1a 00 vmovss xmm1,DWORD PTR [rip+0x1a7f] # 403014 <_IO_stdin_used+0x14>
401594: 00
401595: c5 fa 58 c1 vaddss xmm0,xmm0,xmm1
401599: c5 f2 5e c0 vdivss xmm0,xmm1,xmm0
40159d: c4 c1 7a 11 04 9e vmovss DWORD PTR [r14+rbx*4],xmm0
4015a3: 48 83 c3 04 add rbx,0x4
4015a7: 48 81 fb 03 ca 9a 3b cmp rbx,0x3b9aca03
4015ae: 0f 85 4c ff ff ff jne 401500 <main+0x160>
4015b4: e9 9f 04 00 00 jmp 401a58 <main+0x6b8>
4015b9: bb 18 00 00 00 mov ebx,0x18
4015be: c4 e2 7d 18 05 69 1a vbroadcastss ymm0,DWORD PTR [rip+0x1a69] # 403030 <_IO_stdin_used+0x30>
4015c5: 00 00
4015c7: c5 fc 11 84 24 a0 00 vmovups YMMWORD PTR [rsp+0xa0],ymm0
4015ce: 00 00
4015d0: c4 e2 7d 18 05 3b 1a vbroadcastss ymm0,DWORD PTR [rip+0x1a3b] # 403014 <_IO_stdin_used+0x14>
4015d7: 00 00
4015d9: c5 fc 11 84 24 80 00 vmovups YMMWORD PTR [rsp+0x80],ymm0
4015e0: 00 00
4015e2: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
4015e9: 00 00 00
4015ec: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
4015f0: c5 fc 10 84 24 a0 00 vmovups ymm0,YMMWORD PTR [rsp+0xa0]
4015f7: 00 00
4015f9: c4 c1 7c 57 44 9d a0 vxorps ymm0,ymm0,YMMWORD PTR [r13+rbx*4-0x60]
401600: c5 fc 11 44 24 20 vmovups YMMWORD PTR [rsp+0x20],ymm0
401606: c4 e3 7d 19 c0 01 vextractf128 xmm0,ymm0,0x1
40160c: c5 f8 29 44 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm0
401612: c5 f8 77 vzeroupper
401615: e8 c6 fa ff ff call 4010e0 <expf@plt>
40161a: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
40161f: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xf5
401626: f5
401627: e8 b4 fa ff ff call 4010e0 <expf@plt>
40162c: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401631: c4 e3 71 21 c0 10 vinsertps xmm0,xmm1,xmm0,0x10
401637: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
40163c: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0x4e
401643: 4e
401644: e8 97 fa ff ff call 4010e0 <expf@plt>
401649: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
40164e: c4 e3 71 21 c0 20 vinsertps xmm0,xmm1,xmm0,0x20
401654: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
401659: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xe7
401660: e7
401661: e8 7a fa ff ff call 4010e0 <expf@plt>
401666: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
40166b: c4 e3 71 21 c0 30 vinsertps xmm0,xmm1,xmm0,0x30
401671: c5 f8 29 44 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm0
401677: c5 fc 10 44 24 20 vmovups ymm0,YMMWORD PTR [rsp+0x20]
40167d: c5 f8 77 vzeroupper
401680: e8 5b fa ff ff call 4010e0 <expf@plt>
401685: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
40168a: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xf5
401691: f5
401692: e8 49 fa ff ff call 4010e0 <expf@plt>
401697: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
40169c: c4 e3 71 21 c0 10 vinsertps xmm0,xmm1,xmm0,0x10
4016a2: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4016a7: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0x4e
4016ae: 4e
4016af: e8 2c fa ff ff call 4010e0 <expf@plt>
4016b4: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4016b9: c4 e3 71 21 c0 20 vinsertps xmm0,xmm1,xmm0,0x20
4016bf: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4016c4: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xe7
4016cb: e7
4016cc: e8 0f fa ff ff call 4010e0 <expf@plt>
4016d1: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4016d6: c4 e3 71 21 c0 30 vinsertps xmm0,xmm1,xmm0,0x30
4016dc: c4 e3 7d 18 44 24 40 vinsertf128 ymm0,ymm0,XMMWORD PTR [rsp+0x40],0x1
4016e3: 01
4016e4: c5 fc 10 94 24 80 00 vmovups ymm2,YMMWORD PTR [rsp+0x80]
4016eb: 00 00
4016ed: c5 fc 58 c2 vaddps ymm0,ymm0,ymm2
4016f1: c5 fc 53 c8 vrcpps ymm1,ymm0
4016f5: c4 e2 75 ac c2 vfnmadd213ps ymm0,ymm1,ymm2
4016fa: c4 e2 75 98 c1 vfmadd132ps ymm0,ymm1,ymm1
4016ff: c4 c1 7c 11 44 9e a0 vmovups YMMWORD PTR [r14+rbx*4-0x60],ymm0
401706: c5 fc 10 84 24 a0 00 vmovups ymm0,YMMWORD PTR [rsp+0xa0]
40170d: 00 00
40170f: c4 c1 7c 57 44 9d c0 vxorps ymm0,ymm0,YMMWORD PTR [r13+rbx*4-0x40]
401716: c5 fc 11 44 24 20 vmovups YMMWORD PTR [rsp+0x20],ymm0
40171c: c4 e3 7d 19 c0 01 vextractf128 xmm0,ymm0,0x1
401722: c5 f8 29 44 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm0
401728: c5 f8 77 vzeroupper
40172b: e8 b0 f9 ff ff call 4010e0 <expf@plt>
401730: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
401735: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xf5
40173c: f5
40173d: e8 9e f9 ff ff call 4010e0 <expf@plt>
401742: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401747: c4 e3 71 21 c0 10 vinsertps xmm0,xmm1,xmm0,0x10
40174d: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
401752: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0x4e
401759: 4e
40175a: e8 81 f9 ff ff call 4010e0 <expf@plt>
40175f: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401764: c4 e3 71 21 c0 20 vinsertps xmm0,xmm1,xmm0,0x20
40176a: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
40176f: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xe7
401776: e7
401777: e8 64 f9 ff ff call 4010e0 <expf@plt>
40177c: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401781: c4 e3 71 21 c0 30 vinsertps xmm0,xmm1,xmm0,0x30
401787: c5 f8 29 44 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm0
40178d: c5 fc 10 44 24 20 vmovups ymm0,YMMWORD PTR [rsp+0x20]
401793: c5 f8 77 vzeroupper
401796: e8 45 f9 ff ff call 4010e0 <expf@plt>
40179b: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4017a0: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xf5
4017a7: f5
4017a8: e8 33 f9 ff ff call 4010e0 <expf@plt>
4017ad: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4017b2: c4 e3 71 21 c0 10 vinsertps xmm0,xmm1,xmm0,0x10
4017b8: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4017bd: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0x4e
4017c4: 4e
4017c5: e8 16 f9 ff ff call 4010e0 <expf@plt>
4017ca: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4017cf: c4 e3 71 21 c0 20 vinsertps xmm0,xmm1,xmm0,0x20
4017d5: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4017da: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xe7
4017e1: e7
4017e2: e8 f9 f8 ff ff call 4010e0 <expf@plt>
4017e7: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4017ec: c4 e3 71 21 c0 30 vinsertps xmm0,xmm1,xmm0,0x30
4017f2: c4 e3 7d 18 44 24 40 vinsertf128 ymm0,ymm0,XMMWORD PTR [rsp+0x40],0x1
4017f9: 01
4017fa: c5 fc 10 94 24 80 00 vmovups ymm2,YMMWORD PTR [rsp+0x80]
401801: 00 00
401803: c5 fc 58 c2 vaddps ymm0,ymm0,ymm2
401807: c5 fc 53 c8 vrcpps ymm1,ymm0
40180b: c4 e2 75 ac c2 vfnmadd213ps ymm0,ymm1,ymm2
401810: c4 e2 75 98 c1 vfmadd132ps ymm0,ymm1,ymm1
401815: c4 c1 7c 11 44 9e c0 vmovups YMMWORD PTR [r14+rbx*4-0x40],ymm0
40181c: c5 fc 10 84 24 a0 00 vmovups ymm0,YMMWORD PTR [rsp+0xa0]
401823: 00 00
401825: c4 c1 7c 57 44 9d e0 vxorps ymm0,ymm0,YMMWORD PTR [r13+rbx*4-0x20]
40182c: c5 fc 11 44 24 20 vmovups YMMWORD PTR [rsp+0x20],ymm0
401832: c4 e3 7d 19 c0 01 vextractf128 xmm0,ymm0,0x1
401838: c5 f8 29 44 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm0
40183e: c5 f8 77 vzeroupper
401841: e8 9a f8 ff ff call 4010e0 <expf@plt>
401846: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
40184b: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xf5
401852: f5
401853: e8 88 f8 ff ff call 4010e0 <expf@plt>
401858: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
40185d: c4 e3 71 21 c0 10 vinsertps xmm0,xmm1,xmm0,0x10
401863: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
401868: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0x4e
40186f: 4e
401870: e8 6b f8 ff ff call 4010e0 <expf@plt>
401875: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
40187a: c4 e3 71 21 c0 20 vinsertps xmm0,xmm1,xmm0,0x20
401880: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
401885: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xe7
40188c: e7
40188d: e8 4e f8 ff ff call 4010e0 <expf@plt>
401892: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401897: c4 e3 71 21 c0 30 vinsertps xmm0,xmm1,xmm0,0x30
40189d: c5 f8 29 44 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm0
4018a3: c5 fc 10 44 24 20 vmovups ymm0,YMMWORD PTR [rsp+0x20]
4018a9: c5 f8 77 vzeroupper
4018ac: e8 2f f8 ff ff call 4010e0 <expf@plt>
4018b1: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4018b6: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xf5
4018bd: f5
4018be: e8 1d f8 ff ff call 4010e0 <expf@plt>
4018c3: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4018c8: c4 e3 71 21 c0 10 vinsertps xmm0,xmm1,xmm0,0x10
4018ce: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4018d3: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0x4e
4018da: 4e
4018db: e8 00 f8 ff ff call 4010e0 <expf@plt>
4018e0: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4018e5: c4 e3 71 21 c0 20 vinsertps xmm0,xmm1,xmm0,0x20
4018eb: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4018f0: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xe7
4018f7: e7
4018f8: e8 e3 f7 ff ff call 4010e0 <expf@plt>
4018fd: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401902: c4 e3 71 21 c0 30 vinsertps xmm0,xmm1,xmm0,0x30
401908: c4 e3 7d 18 44 24 40 vinsertf128 ymm0,ymm0,XMMWORD PTR [rsp+0x40],0x1
40190f: 01
401910: c5 fc 10 94 24 80 00 vmovups ymm2,YMMWORD PTR [rsp+0x80]
401917: 00 00
401919: c5 fc 58 c2 vaddps ymm0,ymm0,ymm2
40191d: c5 fc 53 c8 vrcpps ymm1,ymm0
401921: c4 e2 75 ac c2 vfnmadd213ps ymm0,ymm1,ymm2
401926: c4 e2 75 98 c1 vfmadd132ps ymm0,ymm1,ymm1
40192b: c4 c1 7c 11 44 9e e0 vmovups YMMWORD PTR [r14+rbx*4-0x20],ymm0
401932: c5 fc 10 84 24 a0 00 vmovups ymm0,YMMWORD PTR [rsp+0xa0]
401939: 00 00
40193b: c4 c1 7c 57 44 9d 00 vxorps ymm0,ymm0,YMMWORD PTR [r13+rbx*4+0x0]
401942: c5 fc 11 44 24 20 vmovups YMMWORD PTR [rsp+0x20],ymm0
401948: c4 e3 7d 19 c0 01 vextractf128 xmm0,ymm0,0x1
40194e: c5 f8 29 44 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm0
401954: c5 f8 77 vzeroupper
401957: e8 84 f7 ff ff call 4010e0 <expf@plt>
40195c: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
401961: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xf5
401968: f5
401969: e8 72 f7 ff ff call 4010e0 <expf@plt>
40196e: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401973: c4 e3 71 21 c0 10 vinsertps xmm0,xmm1,xmm0,0x10
401979: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
40197e: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0x4e
401985: 4e
401986: e8 55 f7 ff ff call 4010e0 <expf@plt>
40198b: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401990: c4 e3 71 21 c0 20 vinsertps xmm0,xmm1,xmm0,0x20
401996: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
40199b: c4 e3 79 04 44 24 40 vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xe7
4019a2: e7
4019a3: e8 38 f7 ff ff call 4010e0 <expf@plt>
4019a8: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4019ad: c4 e3 71 21 c0 30 vinsertps xmm0,xmm1,xmm0,0x30
4019b3: c5 f8 29 44 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm0
4019b9: c5 fc 10 44 24 20 vmovups ymm0,YMMWORD PTR [rsp+0x20]
4019bf: c5 f8 77 vzeroupper
4019c2: e8 19 f7 ff ff call 4010e0 <expf@plt>
4019c7: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4019cc: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xf5
4019d3: f5
4019d4: e8 07 f7 ff ff call 4010e0 <expf@plt>
4019d9: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4019de: c4 e3 71 21 c0 10 vinsertps xmm0,xmm1,xmm0,0x10
4019e4: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
4019e9: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0x4e
4019f0: 4e
4019f1: e8 ea f6 ff ff call 4010e0 <expf@plt>
4019f6: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
4019fb: c4 e3 71 21 c0 20 vinsertps xmm0,xmm1,xmm0,0x20
401a01: c5 f8 29 04 24 vmovaps XMMWORD PTR [rsp],xmm0
401a06: c4 e3 79 04 44 24 20 vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xe7
401a0d: e7
401a0e: e8 cd f6 ff ff call 4010e0 <expf@plt>
401a13: c5 f8 28 0c 24 vmovaps xmm1,XMMWORD PTR [rsp]
401a18: c4 e3 71 21 c0 30 vinsertps xmm0,xmm1,xmm0,0x30
401a1e: c4 e3 7d 18 44 24 40 vinsertf128 ymm0,ymm0,XMMWORD PTR [rsp+0x40],0x1
401a25: 01
401a26: c5 fc 10 94 24 80 00 vmovups ymm2,YMMWORD PTR [rsp+0x80]
401a2d: 00 00
401a2f: c5 fc 58 c2 vaddps ymm0,ymm0,ymm2
401a33: c5 fc 53 c8 vrcpps ymm1,ymm0
401a37: c4 e2 75 ac c2 vfnmadd213ps ymm0,ymm1,ymm2
401a3c: c4 e2 75 98 c1 vfmadd132ps ymm0,ymm1,ymm1
401a41: c4 c1 7c 11 04 9e vmovups YMMWORD PTR [r14+rbx*4],ymm0
401a47: 48 83 c3 20 add rbx,0x20
401a4b: 48 81 fb 18 ca 9a 3b cmp rbx,0x3b9aca18
401a52: 0f 85 98 fb ff ff jne 4015f0 <main+0x250>
401a58: c5 f8 77 vzeroupper
401a5b: e8 d0 f5 ff ff call 401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>
401a60: 4c 29 e0 sub rax,r12
401a63: 48 b9 db 34 b6 d7 82 movabs rcx,0x431bde82d7b634db
401a6a: de 1b 43
401a6d: 48 f7 e9 imul rcx
401a70: 48 89 d3 mov rbx,rdx
401a73: 48 89 d0 mov rax,rdx
401a76: 48 c1 e8 3f shr rax,0x3f
401a7a: 48 c1 fb 12 sar rbx,0x12
401a7e: 48 01 c3 add rbx,rax
401a81: 48 c7 c5 e0 ff ff ff mov rbp,0xffffffffffffffe0
401a88: e8 a3 f5 ff ff call 401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>
401a8d: 49 89 c4 mov r12,rax
401a90: c4 e2 7d 18 05 6b 15 vbroadcastss ymm0,DWORD PTR [rip+0x156b] # 403004 <_IO_stdin_used+0x4>
401a97: 00 00
401a99: c5 fc 11 84 24 40 01 vmovups YMMWORD PTR [rsp+0x140],ymm0
401aa0: 00 00
401aa2: c4 62 7d 18 05 61 15 vbroadcastss ymm8,DWORD PTR [rip+0x1561] # 40300c <_IO_stdin_used+0xc>
401aa9: 00 00
401aab: c4 e2 7d 18 05 5c 15 vbroadcastss ymm0,DWORD PTR [rip+0x155c] # 403010 <_IO_stdin_used+0x10>
401ab2: 00 00
401ab4: c5 fc 11 44 24 20 vmovups YMMWORD PTR [rsp+0x20],ymm0
401aba: c4 e2 7d 18 1d 51 15 vbroadcastss ymm3,DWORD PTR [rip+0x1551] # 403014 <_IO_stdin_used+0x14>
401ac1: 00 00
401ac3: c4 e2 7d 18 05 4c 15 vbroadcastss ymm0,DWORD PTR [rip+0x154c] # 403018 <_IO_stdin_used+0x18>
401aca: 00 00
401acc: c5 fc 11 84 24 a0 00 vmovups YMMWORD PTR [rsp+0xa0],ymm0
401ad3: 00 00
401ad5: c4 e2 7d 18 05 3e 15 vbroadcastss ymm0,DWORD PTR [rip+0x153e] # 40301c <_IO_stdin_used+0x1c>
401adc: 00 00
401ade: c5 fc 11 84 24 20 01 vmovups YMMWORD PTR [rsp+0x120],ymm0
401ae5: 00 00
401ae7: c4 e2 7d 18 05 30 15 vbroadcastss ymm0,DWORD PTR [rip+0x1530] # 403020 <_IO_stdin_used+0x20>
401aee: 00 00
401af0: c5 fc 11 84 24 00 01 vmovups YMMWORD PTR [rsp+0x100],ymm0
401af7: 00 00
401af9: c4 e2 7d 18 05 22 15 vbroadcastss ymm0,DWORD PTR [rip+0x1522] # 403024 <_IO_stdin_used+0x24>
401b00: 00 00
401b02: c5 fc 11 84 24 e0 00 vmovups YMMWORD PTR [rsp+0xe0],ymm0
401b09: 00 00
401b0b: c4 e2 7d 18 05 14 15 vbroadcastss ymm0,DWORD PTR [rip+0x1514] # 403028 <_IO_stdin_used+0x28>
401b12: 00 00
401b14: c5 fc 11 84 24 80 00 vmovups YMMWORD PTR [rsp+0x80],ymm0
401b1b: 00 00
401b1d: c4 e2 7d 18 05 06 15 vbroadcastss ymm0,DWORD PTR [rip+0x1506] # 40302c <_IO_stdin_used+0x2c>
401b24: 00 00
401b26: c5 fc 11 44 24 40 vmovups YMMWORD PTR [rsp+0x40],ymm0
401b2c: 0f 1f 40 00 nop DWORD PTR [rax+0x0]
401b30: 41 0f 18 8c ad 00 01 prefetcht0 BYTE PTR [r13+rbp*4+0x100]
401b37: 00 00
401b39: 41 0f 18 8c ad 80 01 prefetcht0 BYTE PTR [r13+rbp*4+0x180]
401b40: 00 00
401b42: c4 41 7c 28 9c ad 80 vmovaps ymm11,YMMWORD PTR [r13+rbp*4+0x80]
401b49: 00 00 00
401b4c: c4 41 7c 28 ac ad a0 vmovaps ymm13,YMMWORD PTR [r13+rbp*4+0xa0]
401b53: 00 00 00
401b56: c4 c1 7c 28 84 ad c0 vmovaps ymm0,YMMWORD PTR [r13+rbp*4+0xc0]
401b5d: 00 00 00
401b60: c5 fc 11 84 24 60 01 vmovups YMMWORD PTR [rsp+0x160],ymm0
401b67: 00 00
401b69: c5 fc 10 84 24 40 01 vmovups ymm0,YMMWORD PTR [rsp+0x140]
401b70: 00 00
401b72: c5 24 54 f0 vandps ymm14,ymm11,ymm0
401b76: c5 fc 28 d0 vmovaps ymm2,ymm0
401b7a: c4 41 3c c2 fe 02 vcmpleps ymm15,ymm8,ymm14
401b80: c5 7c 10 64 24 20 vmovups ymm12,YMMWORD PTR [rsp+0x20]
401b86: c4 c1 0c c2 e4 01 vcmpltps ymm4,ymm14,ymm12
401b8c: c5 84 54 e4 vandps ymm4,ymm15,ymm4
401b90: c4 41 64 c2 fe 02 vcmpleps ymm15,ymm3,ymm14
401b96: c4 c1 0c c2 c0 01 vcmpltps ymm0,ymm14,ymm8
401b9c: c5 84 54 c0 vandps ymm0,ymm15,ymm0
401ba0: c5 0c c2 fb 01 vcmpltps ymm15,ymm14,ymm3
401ba5: c5 7c 10 8c 24 20 01 vmovups ymm9,YMMWORD PTR [rsp+0x120]
401bac: 00 00
401bae: c5 7c 29 ce vmovaps ymm6,ymm9
401bb2: c5 fc 10 8c 24 a0 00 vmovups ymm1,YMMWORD PTR [rsp+0xa0]
401bb9: 00 00
401bbb: c4 e2 0d a8 f1 vfmadd213ps ymm6,ymm14,ymm1
401bc0: c4 e3 65 4a f6 f0 vblendvps ymm6,ymm3,ymm6,ymm15
401bc6: c5 c0 57 ff vxorps xmm7,xmm7,xmm7
401bca: c5 24 c2 df 01 vcmpltps ymm11,ymm11,ymm7
401bcf: c5 7c 10 94 24 e0 00 vmovups ymm10,YMMWORD PTR [rsp+0xe0]
401bd6: 00 00
401bd8: c4 41 7c 28 fa vmovaps ymm15,ymm10
401bdd: c5 fc 10 ac 24 00 01 vmovups ymm5,YMMWORD PTR [rsp+0x100]
401be4: 00 00
401be6: c4 62 0d a8 fd vfmadd213ps ymm15,ymm14,ymm5
401beb: c5 fc 28 fd vmovaps ymm7,ymm5
401bef: c4 c3 4d 4a c7 00 vblendvps ymm0,ymm6,ymm15,ymm0
401bf5: c5 fc 10 ac 24 80 00 vmovups ymm5,YMMWORD PTR [rsp+0x80]
401bfc: 00 00
401bfe: c4 62 55 98 74 24 40 vfmadd132ps ymm14,ymm5,YMMWORD PTR [rsp+0x40]
401c05: c4 c3 7d 4a c6 40 vblendvps ymm0,ymm0,ymm14,ymm4
401c0b: c5 e4 5c e0 vsubps ymm4,ymm3,ymm0
401c0f: c4 e3 7d 4a c4 b0 vblendvps ymm0,ymm0,ymm4,ymm11
401c15: c5 fc 11 04 24 vmovups YMMWORD PTR [rsp],ymm0
401c1a: c5 7c 28 da vmovaps ymm11,ymm2
401c1e: c5 94 54 c2 vandps ymm0,ymm13,ymm2
401c22: c5 bc c2 e0 02 vcmpleps ymm4,ymm8,ymm0
401c27: c4 c1 7c c2 f4 01 vcmpltps ymm6,ymm0,ymm12
401c2d: c5 cc 54 e4 vandps ymm4,ymm6,ymm4
401c31: c5 e4 c2 f0 02 vcmpleps ymm6,ymm3,ymm0
401c36: c5 7c c2 f3 01 vcmpltps ymm14,ymm0,ymm3
401c3b: c4 41 7c 28 f9 vmovaps ymm15,ymm9
401c40: c4 62 7d a8 f9 vfmadd213ps ymm15,ymm0,ymm1
401c45: c4 43 65 4a f7 e0 vblendvps ymm14,ymm3,ymm15,ymm14
401c4b: c4 41 7c c2 f8 01 vcmpltps ymm15,ymm0,ymm8
401c51: c5 84 54 f6 vandps ymm6,ymm15,ymm6
401c55: c4 41 7c 28 fa vmovaps ymm15,ymm10
401c5a: c4 62 7d a8 ff vfmadd213ps ymm15,ymm0,ymm7
401c5f: c4 c3 0d 4a f7 60 vblendvps ymm6,ymm14,ymm15,ymm6
401c65: c5 14 c2 2c 25 60 30 vcmpltps ymm13,ymm13,YMMWORD PTR ds:0x403060
401c6c: 40 00 01
401c6f: c5 fc 10 4c 24 40 vmovups ymm1,YMMWORD PTR [rsp+0x40]
401c75: c4 e2 75 a8 c5 vfmadd213ps ymm0,ymm1,ymm5
401c7a: c4 e3 4d 4a c0 40 vblendvps ymm0,ymm6,ymm0,ymm4
401c80: c5 e4 5c e0 vsubps ymm4,ymm3,ymm0
401c84: c4 e3 7d 4a c4 d0 vblendvps ymm0,ymm0,ymm4,ymm13
401c8a: c5 fc 11 84 24 c0 00 vmovups YMMWORD PTR [rsp+0xc0],ymm0
401c91: 00 00
401c93: c5 fc 10 94 24 60 01 vmovups ymm2,YMMWORD PTR [rsp+0x160]
401c9a: 00 00
401c9c: c5 a4 54 c2 vandps ymm0,ymm11,ymm2
401ca0: c5 bc c2 e0 02 vcmpleps ymm4,ymm8,ymm0
401ca5: c5 e4 c2 f0 02 vcmpleps ymm6,ymm3,ymm0
401caa: c4 41 7c c2 f0 01 vcmpltps ymm14,ymm0,ymm8
401cb0: c5 8c 54 f6 vandps ymm6,ymm14,ymm6
401cb4: c5 7c c2 f3 01 vcmpltps ymm14,ymm0,ymm3
401cb9: c4 41 7c 28 f9 vmovaps ymm15,ymm9
401cbe: c5 fc 10 ac 24 a0 00 vmovups ymm5,YMMWORD PTR [rsp+0xa0]
401cc5: 00 00
401cc7: c4 62 7d a8 fd vfmadd213ps ymm15,ymm0,ymm5
401ccc: c4 43 65 4a f7 e0 vblendvps ymm14,ymm3,ymm15,ymm14
401cd2: c4 41 7c 28 fa vmovaps ymm15,ymm10
401cd7: c4 62 7d a8 ff vfmadd213ps ymm15,ymm0,ymm7
401cdc: c5 7c 28 ef vmovaps ymm13,ymm7
401ce0: c4 c3 0d 4a f7 60 vblendvps ymm6,ymm14,ymm15,ymm6
401ce6: c4 41 7c c2 f4 01 vcmpltps ymm14,ymm0,ymm12
401cec: c5 8c 54 e4 vandps ymm4,ymm14,ymm4
401cf0: c5 fc 10 bc 24 80 00 vmovups ymm7,YMMWORD PTR [rsp+0x80]
401cf7: 00 00
401cf9: c4 e2 75 a8 c7 vfmadd213ps ymm0,ymm1,ymm7
401cfe: c4 e3 4d 4a c0 40 vblendvps ymm0,ymm6,ymm0,ymm4
401d04: c5 ec c2 24 25 60 30 vcmpltps ymm4,ymm2,YMMWORD PTR ds:0x403060
401d0b: 40 00 01
401d0e: c5 e8 57 d2 vxorps xmm2,xmm2,xmm2
401d12: c5 e4 5c f0 vsubps ymm6,ymm3,ymm0
401d16: c4 e3 7d 4a c6 40 vblendvps ymm0,ymm0,ymm6,ymm4
401d1c: c4 c1 7c 28 a4 ad e0 vmovaps ymm4,YMMWORD PTR [r13+rbp*4+0xe0]
401d23: 00 00 00
401d26: c5 a4 54 f4 vandps ymm6,ymm11,ymm4
401d2a: c5 64 c2 e6 02 vcmpleps ymm12,ymm3,ymm6
401d2f: c5 4c c2 f3 01 vcmpltps ymm14,ymm6,ymm3
401d34: c4 62 4d a8 cd vfmadd213ps ymm9,ymm6,ymm5
401d39: c4 43 65 4a f1 e0 vblendvps ymm14,ymm3,ymm9,ymm14
401d3f: c4 41 4c c2 f8 01 vcmpltps ymm15,ymm6,ymm8
401d45: c4 41 04 54 e4 vandps ymm12,ymm15,ymm12
401d4a: c4 42 4d a8 d5 vfmadd213ps ymm10,ymm6,ymm13
401d4f: c4 43 0d 4a e2 c0 vblendvps ymm12,ymm14,ymm10,ymm12
401d55: c5 3c c2 f6 02 vcmpleps ymm14,ymm8,ymm6
401d5a: c5 4c c2 7c 24 20 01 vcmpltps ymm15,ymm6,YMMWORD PTR [rsp+0x20]
401d61: c4 41 04 54 f6 vandps ymm14,ymm15,ymm14
401d66: c4 e2 75 a8 f7 vfmadd213ps ymm6,ymm1,ymm7
401d6b: c4 e3 1d 4a f6 e0 vblendvps ymm6,ymm12,ymm6,ymm14
401d71: c5 dc c2 e2 01 vcmpltps ymm4,ymm4,ymm2
401d76: c5 64 5c e6 vsubps ymm12,ymm3,ymm6
401d7a: c4 c3 4d 4a e4 40 vblendvps ymm4,ymm6,ymm12,ymm4
401d80: c5 fc 10 14 24 vmovups ymm2,YMMWORD PTR [rsp]
401d85: c4 c1 7c 29 94 af 80 vmovaps YMMWORD PTR [r15+rbp*4+0x80],ymm2
401d8c: 00 00 00
401d8f: c5 fc 10 8c 24 c0 00 vmovups ymm1,YMMWORD PTR [rsp+0xc0]
401d96: 00 00
401d98: c4 c1 7c 29 8c af a0 vmovaps YMMWORD PTR [r15+rbp*4+0xa0],ymm1
401d9f: 00 00 00
401da2: c4 c1 7c 29 84 af c0 vmovaps YMMWORD PTR [r15+rbp*4+0xc0],ymm0
401da9: 00 00 00
401dac: c4 c1 7c 29 a4 af e0 vmovaps YMMWORD PTR [r15+rbp*4+0xe0],ymm4
401db3: 00 00 00
401db6: 48 83 c5 20 add rbp,0x20
401dba: 48 81 fd e0 c9 9a 3b cmp rbp,0x3b9ac9e0
401dc1: 0f 82 69 fd ff ff jb 401b30 <main+0x790>
401dc7: c4 41 38 57 c0 vxorps xmm8,xmm8,xmm8
401dcc: c4 e1 bb 2a c3 vcvtsi2sd xmm0,xmm8,rbx
401dd1: c5 fb 11 04 24 vmovsd QWORD PTR [rsp],xmm0
401dd6: c5 f8 77 vzeroupper
401dd9: e8 52 f2 ff ff call 401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>
401dde: 4c 29 e0 sub rax,r12
401de1: 48 b9 db 34 b6 d7 82 movabs rcx,0x431bde82d7b634db
401de8: de 1b 43
401deb: 48 f7 e9 imul rcx
401dee: 48 89 d0 mov rax,rdx
401df1: 48 c1 e8 3f shr rax,0x3f
401df5: 48 c1 fa 12 sar rdx,0x12
401df9: 48 01 c2 add rdx,rax
401dfc: 48 89 94 24 c0 00 00 mov QWORD PTR [rsp+0xc0],rdx
401e03: 00
401e04: b0 01 mov al,0x1
401e06: 31 db xor ebx,ebx
401e08: c4 e2 79 18 0d f3 11 vbroadcastss xmm1,DWORD PTR [rip+0x11f3] # 403004 <_IO_stdin_used+0x4>
401e0f: 00 00
401e11: c5 fa 10 15 1b 12 00 vmovss xmm2,DWORD PTR [rip+0x121b] # 403034 <_IO_stdin_used+0x34>
401e18: 00
401e19: 45 31 e4 xor r12d,r12d
401e1c: c5 f8 29 4c 24 40 vmovaps XMMWORD PTR [rsp+0x40],xmm1
401e22: eb 21 jmp 401e45 <main+0xaa5>
401e24: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
401e2b: 00 00 00
401e2e: 66 90 xchg ax,ax
401e30: 41 ff c4 inc r12d
401e33: 31 c0 xor eax,eax
401e35: 48 ff c3 inc rbx
401e38: 48 81 fb 00 ca 9a 3b cmp rbx,0x3b9aca00
401e3f: 0f 84 1f 01 00 00 je 401f64 <main+0xbc4>
401e45: c4 c1 7a 10 24 9e vmovss xmm4,DWORD PTR [r14+rbx*4]
401e4b: c4 c1 7a 10 1c 9f vmovss xmm3,DWORD PTR [r15+rbx*4]
401e51: c5 da 5c c3 vsubss xmm0,xmm4,xmm3
401e55: c5 f8 54 c1 vandps xmm0,xmm0,xmm1
401e59: c5 f8 2e c2 vucomiss xmm0,xmm2
401e5d: 76 d6 jbe 401e35 <main+0xa95>
401e5f: a8 01 test al,0x1
401e61: 74 cd je 401e30 <main+0xa90>
401e63: bf e0 51 40 00 mov edi,0x4051e0
401e68: be 80 30 40 00 mov esi,0x403080
401e6d: ba 10 00 00 00 mov edx,0x10
401e72: c5 f8 29 44 24 20 vmovaps XMMWORD PTR [rsp+0x20],xmm0
401e78: c5 fa 11 9c 24 a0 00 vmovss DWORD PTR [rsp+0xa0],xmm3
401e7f: 00 00
401e81: c5 fa 11 a4 24 80 00 vmovss DWORD PTR [rsp+0x80],xmm4
401e88: 00 00
401e8a: e8 11 f2 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401e8f: bf e0 51 40 00 mov edi,0x4051e0
401e94: 48 89 de mov rsi,rbx
401e97: e8 c4 f1 ff ff call 401060 <_ZNSo9_M_insertImEERSoT_@plt>
401e9c: 48 89 c5 mov rbp,rax
401e9f: be 91 30 40 00 mov esi,0x403091
401ea4: ba 03 00 00 00 mov edx,0x3
401ea9: 48 89 c7 mov rdi,rax
401eac: e8 ef f1 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401eb1: c4 c1 7a 10 44 9d 00 vmovss xmm0,DWORD PTR [r13+rbx*4+0x0]
401eb8: c5 fa 5a c0 vcvtss2sd xmm0,xmm0,xmm0
401ebc: 48 89 ef mov rdi,rbp
401ebf: e8 0c f2 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
401ec4: 48 89 c5 mov rbp,rax
401ec7: be 95 30 40 00 mov esi,0x403095
401ecc: ba 0a 00 00 00 mov edx,0xa
401ed1: 48 89 c7 mov rdi,rax
401ed4: e8 c7 f1 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401ed9: c5 fa 10 84 24 80 00 vmovss xmm0,DWORD PTR [rsp+0x80]
401ee0: 00 00
401ee2: c5 fa 5a c0 vcvtss2sd xmm0,xmm0,xmm0
401ee6: 48 89 ef mov rdi,rbp
401ee9: e8 e2 f1 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
401eee: 48 89 c5 mov rbp,rax
401ef1: be a0 30 40 00 mov esi,0x4030a0
401ef6: ba 08 00 00 00 mov edx,0x8
401efb: 48 89 c7 mov rdi,rax
401efe: e8 9d f1 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401f03: c5 fa 10 84 24 a0 00 vmovss xmm0,DWORD PTR [rsp+0xa0]
401f0a: 00 00
401f0c: c5 fa 5a c0 vcvtss2sd xmm0,xmm0,xmm0
401f10: 48 89 ef mov rdi,rbp
401f13: e8 b8 f1 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
401f18: 48 89 c5 mov rbp,rax
401f1b: be a9 30 40 00 mov esi,0x4030a9
401f20: ba 0e 00 00 00 mov edx,0xe
401f25: 48 89 c7 mov rdi,rax
401f28: e8 73 f1 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401f2d: c5 f8 28 44 24 20 vmovaps xmm0,XMMWORD PTR [rsp+0x20]
401f33: c5 fa 5a c0 vcvtss2sd xmm0,xmm0,xmm0
401f37: 48 89 ef mov rdi,rbp
401f3a: e8 91 f1 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
401f3f: be e5 30 40 00 mov esi,0x4030e5
401f44: ba 01 00 00 00 mov edx,0x1
401f49: 48 89 c7 mov rdi,rax
401f4c: e8 4f f1 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401f51: c5 fa 10 15 db 10 00 vmovss xmm2,DWORD PTR [rip+0x10db] # 403034 <_IO_stdin_used+0x34>
401f58: 00
401f59: c5 f8 28 4c 24 40 vmovaps xmm1,XMMWORD PTR [rsp+0x40]
401f5f: e9 cc fe ff ff jmp 401e30 <main+0xa90>
401f64: c4 e1 bb 2a 84 24 c0 vcvtsi2sd xmm0,xmm8,QWORD PTR [rsp+0xc0]
401f6b: 00 00 00
401f6e: c5 fb 11 44 24 20 vmovsd QWORD PTR [rsp+0x20],xmm0
401f74: bf c0 50 40 00 mov edi,0x4050c0
401f79: be b8 30 40 00 mov esi,0x4030b8
401f7e: ba 18 00 00 00 mov edx,0x18
401f83: e8 18 f1 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401f88: bf c0 50 40 00 mov edi,0x4050c0
401f8d: be e5 30 40 00 mov esi,0x4030e5
401f92: ba 01 00 00 00 mov edx,0x1
401f97: e8 04 f1 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401f9c: bf c0 50 40 00 mov edi,0x4050c0
401fa1: be d1 30 40 00 mov esi,0x4030d1
401fa6: ba 0b 00 00 00 mov edx,0xb
401fab: e8 f0 f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401fb0: c5 fb 10 05 80 10 00 vmovsd xmm0,QWORD PTR [rip+0x1080] # 403038 <_IO_stdin_used+0x38>
401fb7: 00
401fb8: bf c0 50 40 00 mov edi,0x4050c0
401fbd: e8 0e f1 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
401fc2: be dd 30 40 00 mov esi,0x4030dd
401fc7: ba 09 00 00 00 mov edx,0x9
401fcc: 48 89 c7 mov rdi,rax
401fcf: e8 cc f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401fd4: bf c0 50 40 00 mov edi,0x4050c0
401fd9: be e7 30 40 00 mov esi,0x4030e7
401fde: ba 14 00 00 00 mov edx,0x14
401fe3: e8 b8 f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
401fe8: bf c0 50 40 00 mov edi,0x4050c0
401fed: c5 fb 10 04 24 vmovsd xmm0,QWORD PTR [rsp]
401ff2: e8 d9 f0 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
401ff7: be fc 30 40 00 mov esi,0x4030fc
401ffc: ba 04 00 00 00 mov edx,0x4
402001: 48 89 c7 mov rdi,rax
402004: e8 97 f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
402009: bf c0 50 40 00 mov edi,0x4050c0
40200e: be 01 31 40 00 mov esi,0x403101
402013: ba 17 00 00 00 mov edx,0x17
402018: e8 83 f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
40201d: bf c0 50 40 00 mov edi,0x4050c0
402022: c5 fb 10 44 24 20 vmovsd xmm0,QWORD PTR [rsp+0x20]
402028: e8 a3 f0 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
40202d: be fc 30 40 00 mov esi,0x4030fc
402032: ba 04 00 00 00 mov edx,0x4
402037: 48 89 c7 mov rdi,rax
40203a: e8 61 f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
40203f: bf c0 50 40 00 mov edi,0x4050c0
402044: be 19 31 40 00 mov esi,0x403119
402049: ba 0b 00 00 00 mov edx,0xb
40204e: e8 4d f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
402053: c5 fb 10 04 24 vmovsd xmm0,QWORD PTR [rsp]
402058: c5 fb 5e 44 24 20 vdivsd xmm0,xmm0,QWORD PTR [rsp+0x20]
40205e: bf c0 50 40 00 mov edi,0x4050c0
402063: e8 68 f0 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
402068: be 25 31 40 00 mov esi,0x403125
40206d: ba 02 00 00 00 mov edx,0x2
402072: 48 89 c7 mov rdi,rax
402075: e8 26 f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
40207a: bf c0 50 40 00 mov edi,0x4050c0
40207f: be 28 31 40 00 mov esi,0x403128
402084: ba 0b 00 00 00 mov edx,0xb
402089: e8 12 f0 ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
40208e: b8 00 ca 9a 3b mov eax,0x3b9aca00
402093: 44 29 e0 sub eax,r12d
402096: c4 e1 d3 2a c0 vcvtsi2sd xmm0,xmm5,rax
40209b: c5 fb 59 05 9d 0f 00 vmulsd xmm0,xmm0,QWORD PTR [rip+0xf9d] # 403040 <_IO_stdin_used+0x40>
4020a2: 00
4020a3: bf c0 50 40 00 mov edi,0x4050c0
4020a8: e8 23 f0 ff ff call 4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
4020ad: be e5 30 40 00 mov esi,0x4030e5
4020b2: ba 01 00 00 00 mov edx,0x1
4020b7: 48 89 c7 mov rdi,rax
4020ba: e8 e1 ef ff ff call 4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
4020bf: b8 80 28 6b ee mov eax,0xee6b2880
4020c4: 41 0f ae 7c 05 00 clflush BYTE PTR [r13+rax*1+0x0]
4020ca: b8 00 29 6b ee mov eax,0xee6b2900
4020cf: 41 0f ae 7c 05 00 clflush BYTE PTR [r13+rax*1+0x0]
4020d5: 4c 89 ef mov rdi,r13
4020d8: e8 d3 ef ff ff call 4010b0 <free@plt>
4020dd: 4c 89 f7 mov rdi,r14
4020e0: e8 cb ef ff ff call 4010b0 <free@plt>
4020e5: 4c 89 ff mov rdi,r15
4020e8: e8 c3 ef ff ff call 4010b0 <free@plt>
4020ed: 31 c0 xor eax,eax
4020ef: 48 81 c4 88 01 00 00 add rsp,0x188
4020f6: 5b pop rbx
4020f7: 41 5c pop r12
4020f9: 41 5d pop r13
4020fb: 41 5e pop r14
4020fd: 41 5f pop r15
4020ff: 5d pop rbp
402100: c3 ret
402101: 66 2e 0f 1f 84 00 00 nop WORD PTR cs:[rax+rax*1+0x0]
402108: 00 00 00
40210b: 0f 1f 44 00 00 nop DWORD PTR [rax+rax*1+0x0]
0000000000402110 <__libc_csu_init>:
402110: f3 0f 1e fa endbr64
402114: 41 57 push r15
402116: 4c 8d 3d b3 2c 00 00 lea r15,[rip+0x2cb3] # 404dd0 <__frame_dummy_init_array_entry>
40211d: 41 56 push r14
40211f: 49 89 d6 mov r14,rdx
402122: 41 55 push r13
402124: 49 89 f5 mov r13,rsi
402127: 41 54 push r12
402129: 41 89 fc mov r12d,edi
40212c: 55 push rbp
40212d: 48 8d 2d b4 2c 00 00 lea rbp,[rip+0x2cb4] # 404de8 <__do_global_dtors_aux_fini_array_entry>
402134: 53 push rbx
402135: 4c 29 fd sub rbp,r15
402138: 48 83 ec 08 sub rsp,0x8
40213c: e8 bf ee ff ff call 401000 <_init>
402141: 48 c1 fd 03 sar rbp,0x3
402145: 74 1f je 402166 <__libc_csu_init+0x56>
402147: 31 db xor ebx,ebx
402149: 0f 1f 80 00 00 00 00 nop DWORD PTR [rax+0x0]
402150: 4c 89 f2 mov rdx,r14
402153: 4c 89 ee mov rsi,r13
402156: 44 89 e7 mov edi,r12d
402159: 41 ff 14 df call QWORD PTR [r15+rbx*8]
40215d: 48 83 c3 01 add rbx,0x1
402161: 48 39 dd cmp rbp,rbx
402164: 75 ea jne 402150 <__libc_csu_init+0x40>
402166: 48 83 c4 08 add rsp,0x8
40216a: 5b pop rbx
40216b: 5d pop rbp
40216c: 41 5c pop r12
40216e: 41 5d pop r13
402170: 41 5e pop r14
402172: 41 5f pop r15
402174: c3 ret
402175: 66 66 2e 0f 1f 84 00 data16 nop WORD PTR cs:[rax+rax*1+0x0]
40217c: 00 00 00 00
0000000000402180 <__libc_csu_fini>:
402180: f3 0f 1e fa endbr64
402184: c3 ret
Disassembly of section .fini:
0000000000402188 <_fini>:
402188: f3 0f 1e fa endbr64
40218c: 48 83 ec 08 sub rsp,0x8
402190: 48 83 c4 08 add rsp,0x8
402194: c3 ret