循环向量化与标量性能对比--sigmoid函数

10亿条数据应用在 sigmoid函数上(渐近方式参考:https://zhuanlan.zhihu.com/p/318423774), 验证结论:向量版本比标准库版本平均提升5.47倍
1.1 绝对误差< 0.019(ABSOLUTE_ERROR = 0.019)

root@sun-PowerEdge-R630:/opt/test# clang++ -O3 -mavx2 -ffast-math -mfma -march=native -o sum sum.cpp
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12882 ms
向量化运算耗时: 2325 ms
加速比: 5.54065x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12772 ms
向量化运算耗时: 2327 ms
加速比: 5.48861x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12720 ms
向量化运算耗时: 2330 ms
加速比: 5.45923x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12707 ms
向量化运算耗时: 2328 ms
加速比: 5.45833x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12706 ms
向量化运算耗时: 2322 ms
加速比: 5.47201x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12695 ms
向量化运算耗时: 2329 ms
加速比: 5.45084x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12713 ms
向量化运算耗时: 2328 ms
加速比: 5.46091x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 13030 ms
向量化运算耗时: 2374 ms
加速比: 5.48863x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12888 ms
向量化运算耗时: 2331 ms
加速比: 5.52896x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12733 ms
向量化运算耗时: 2334 ms
加速比: 5.45544x
通过比: 1
root@sun-PowerEdge-R630:/opt/test# ./sum
===== 性能报告 =====
数据量: 1000 Million
标量运算耗时: 12854 ms
向量化运算耗时: 2344 ms
加速比: 5.48379x
通过比: 1

1.2 计算sigmoid函数

#include <immintrin.h>
#include <cmath>
#include <iostream>
#include <chrono>
#include <cstdlib>
#include <cstring>

#define DATA_SIZE 1000000000    // 10亿数据
#define ALIGNMENT 32          // 内存对齐要求
#define UNROLL_FACTOR 4       // 循环展开因子
#define SIMD_STEP (8 * UNROLL_FACTOR) // 每次处理32个元素
#define ABSOLUTE_ERROR 0.019f // 绝对误差  (|actual - expect|)

float random_float() {
    union {
        uint32_t i;
        float f;
    } u;

    do {
        // 生成随机位模式(排除NaN和无穷大)
        u.i = (rand() & 0x7FFFFF) | ((rand() % 254 + 1) << 23); // 指数范围[1,254]
        if(rand() % 10 == 0) u.i |= 0x80000000; // 10%概率为负数
    } while(!std::isfinite(u.f)); // 确保生成合法数值

    return u.f;
}

// 向量化Sigmoid函数
__m256 mm256_sigmoid_ps(__m256 x) {
    const __m256 abs_x = _mm256_and_ps(x, _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)));
    const __m256 zero = _mm256_setzero_ps();
    const __m256 one = _mm256_set1_ps(1.0f);
    // 各区间阈值
    const __m256 thr5 = _mm256_set1_ps(5.0f);
    const __m256 thr2_375 = _mm256_set1_ps(2.375f);
    const __m256 thr1 = one;

    // 生成区间掩码
    __m256 mask_2_375 = _mm256_and_ps(
        _mm256_cmp_ps(abs_x, thr2_375, _CMP_GE_OQ),
        _mm256_cmp_ps(abs_x, thr5, _CMP_LT_OQ)
    );
    __m256 mask_1 = _mm256_and_ps(
        _mm256_cmp_ps(abs_x, thr1, _CMP_GE_OQ),
        _mm256_cmp_ps(abs_x, thr2_375, _CMP_LT_OQ)
    );
    __m256 mask_abs0 = _mm256_cmp_ps(abs_x, thr1, _CMP_LT_OQ);
    __m256 mask_0 = _mm256_cmp_ps(x, zero, _CMP_LT_OQ);

    // 分段计算
    __m256 result = _mm256_set1_ps(1.0f);  // |x| >=5

    // 0 <= |x| <1: 0.25*|x| + 0.5
    __m256 p3 = _mm256_fmadd_ps(abs_x, _mm256_set1_ps(0.25f), _mm256_set1_ps(0.5f));
    result = _mm256_blendv_ps(result, p3, mask_abs0);

    // 1 <= |x| <2.375: 0.125*|x| + 0.625
    __m256 p2 = _mm256_fmadd_ps(abs_x, _mm256_set1_ps(0.125f), _mm256_set1_ps(0.625f));
    result = _mm256_blendv_ps(result, p2, mask_1);

    // 2.375 <= |x| <5: 0.03125*|x| + 0.84375
    __m256 p1 = _mm256_fmadd_ps(abs_x, _mm256_set1_ps(0.03125f), _mm256_set1_ps(0.84375f));
    result = _mm256_blendv_ps(result, p1, mask_2_375);

    // 处理负数: 1 - result
    __m256 neg_result = _mm256_sub_ps(one, result);
    return _mm256_blendv_ps(result, neg_result, mask_0);
}

int main() {
    // 1. 分配对齐内存
    float* input = (float*)_mm_malloc(DATA_SIZE * sizeof(float), ALIGNMENT);
    float* output_scalar = (float*)_mm_malloc(DATA_SIZE * sizeof(float), ALIGNMENT);
    float* output_vector = (float*)_mm_malloc(DATA_SIZE * sizeof(float), ALIGNMENT);

    // 2. 初始化输入数据
    srand(time(nullptr));
    for (size_t i = 0; i < DATA_SIZE; ++i) {
        input[i] = random_float();
    }

    // ================== 标量版本 ==================
    auto start_scalar = std::chrono::high_resolution_clock::now();
    for (size_t i = 0; i < DATA_SIZE; ++i) {
        output_scalar[i] = 1.0f / (1.0f + expf(-input[i]));
    }
    auto end_scalar = std::chrono::high_resolution_clock::now();
    double scalar_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_scalar - start_scalar).count();

    // ================== 向量化版本 ==================
    auto start_vector = std::chrono::high_resolution_clock::now();
    for (size_t i = 0; i < DATA_SIZE; i += SIMD_STEP) {
        // 预取指令
        // x86-64系统缓存行为64字节(shell中敲getconf LEVEL1_DCACHE_LINESIZE查看)
        // 预取32个float
        _mm_prefetch((const char*)(input + i + SIMD_STEP), _MM_HINT_T0); //预取16个float
        _mm_prefetch((const char*)(input + i + 2 * SIMD_STEP), _MM_HINT_T0); //预取16个float
        
        // 加载数据
        __m256 x0 = _mm256_load_ps(input + i);
        __m256 x1 = _mm256_load_ps(input + i + 8);
        __m256 x2 = _mm256_load_ps(input + i + 16);
        __m256 x3 = _mm256_load_ps(input + i + 24);

        // 向量化计算
        __m256 res0 = mm256_sigmoid_ps(x0);
        __m256 res1 = mm256_sigmoid_ps(x1);
        __m256 res2 = mm256_sigmoid_ps(x2);
        __m256 res3 = mm256_sigmoid_ps(x3);

        // 存储结果
        _mm256_store_ps(output_vector + i, res0);
        _mm256_store_ps(output_vector + i + 8, res1);
        _mm256_store_ps(output_vector + i + 16, res2);
        _mm256_store_ps(output_vector + i + 24, res3);
    }
    auto end_vector = std::chrono::high_resolution_clock::now();
    double vector_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_vector - start_vector).count();

    // ================== 验证结果 ==================
    bool verify_ok = true;
    unsigned int cnt = 0;
    for (size_t i = 0; i < DATA_SIZE; ++i) {
        float expected = output_scalar[i];
        float actual = output_vector[i];
        
        // 绝对误差
        float absolute_error = fabs(expected - actual);
        if (absolute_error > ABSOLUTE_ERROR) {
            if (verify_ok)
            {
                std::cerr << "验证失败: i=" << i 
                << " x=" << input[i] 
                << " expected=" << expected 
                << " actual=" << actual 
                << " 绝对误差=" << absolute_error << "\n";
            }
            verify_ok = false;
            cnt++;
        }
    }

    // ================== 性能报告 ==================
    std::cout << "===== 性能报告 =====" << "\n";
    std::cout << "数据量: " << DATA_SIZE/1e6 << " Million\n";
    std::cout << "标量运算耗时: " << scalar_time << " ms\n";
    std::cout << "向量化运算耗时: " << vector_time << " ms\n";
    std::cout << "加速比: " << scalar_time / vector_time << "x\n";
    std::cout << "通过比: " << (double)(DATA_SIZE - cnt)/(double)DATA_SIZE << "\n";

    //清空非法缓存
    _mm_clflush(input + DATA_SIZE + SIMD_STEP);
    _mm_clflush(input + DATA_SIZE + 2 * SIMD_STEP);

    // 释放内存
    _mm_free(input);
    _mm_free(output_scalar);
    _mm_free(output_vector);

    return 0;
}

汇编代码

root@sun-PowerEdge-R630:/opt/test# objdump -d sum -M intel

sum:     file format elf64-x86-64


Disassembly of section .init:

0000000000401000 <_init>:
  401000:       f3 0f 1e fa             endbr64
  401004:       48 83 ec 08             sub    rsp,0x8
  401008:       48 8b 05 e9 3f 00 00    mov    rax,QWORD PTR [rip+0x3fe9]        # 404ff8 <__gmon_start__>
  40100f:       48 85 c0                test   rax,rax
  401012:       74 02                   je     401016 <_init+0x16>
  401014:       ff d0                   call   rax
  401016:       48 83 c4 08             add    rsp,0x8
  40101a:       c3                      ret

Disassembly of section .plt:

0000000000401020 <.plt>:
  401020:       ff 35 e2 3f 00 00       push   QWORD PTR [rip+0x3fe2]        # 405008 <_GLOBAL_OFFSET_TABLE_+0x8>
  401026:       ff 25 e4 3f 00 00       jmp    QWORD PTR [rip+0x3fe4]        # 405010 <_GLOBAL_OFFSET_TABLE_+0x10>
  40102c:       0f 1f 40 00             nop    DWORD PTR [rax+0x0]

0000000000401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>:
  401030:       ff 25 e2 3f 00 00       jmp    QWORD PTR [rip+0x3fe2]        # 405018 <_ZNSt6chrono3_V212system_clock3nowEv@GLIBCXX_3.4.19>
  401036:       68 00 00 00 00          push   0x0
  40103b:       e9 e0 ff ff ff          jmp    401020 <.plt>

0000000000401040 <rand@plt>:
  401040:       ff 25 da 3f 00 00       jmp    QWORD PTR [rip+0x3fda]        # 405020 <rand@GLIBC_2.2.5>
  401046:       68 01 00 00 00          push   0x1
  40104b:       e9 d0 ff ff ff          jmp    401020 <.plt>

0000000000401050 <posix_memalign@plt>:
  401050:       ff 25 d2 3f 00 00       jmp    QWORD PTR [rip+0x3fd2]        # 405028 <posix_memalign@GLIBC_2.2.5>
  401056:       68 02 00 00 00          push   0x2
  40105b:       e9 c0 ff ff ff          jmp    401020 <.plt>

0000000000401060 <_ZNSo9_M_insertImEERSoT_@plt>:
  401060:       ff 25 ca 3f 00 00       jmp    QWORD PTR [rip+0x3fca]        # 405030 <_ZNSo9_M_insertImEERSoT_@GLIBCXX_3.4.9>
  401066:       68 03 00 00 00          push   0x3
  40106b:       e9 b0 ff ff ff          jmp    401020 <.plt>

0000000000401070 <__cxa_atexit@plt>:
  401070:       ff 25 c2 3f 00 00       jmp    QWORD PTR [rip+0x3fc2]        # 405038 <__cxa_atexit@GLIBC_2.2.5>
  401076:       68 04 00 00 00          push   0x4
  40107b:       e9 a0 ff ff ff          jmp    401020 <.plt>

0000000000401080 <time@plt>:
  401080:       ff 25 ba 3f 00 00       jmp    QWORD PTR [rip+0x3fba]        # 405040 <time@GLIBC_2.2.5>
  401086:       68 05 00 00 00          push   0x5
  40108b:       e9 90 ff ff ff          jmp    401020 <.plt>

0000000000401090 <srand@plt>:
  401090:       ff 25 b2 3f 00 00       jmp    QWORD PTR [rip+0x3fb2]        # 405048 <srand@GLIBC_2.2.5>
  401096:       68 06 00 00 00          push   0x6
  40109b:       e9 80 ff ff ff          jmp    401020 <.plt>

00000000004010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>:
  4010a0:       ff 25 aa 3f 00 00       jmp    QWORD PTR [rip+0x3faa]        # 405050 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@GLIBCXX_3.4.9>
  4010a6:       68 07 00 00 00          push   0x7
  4010ab:       e9 70 ff ff ff          jmp    401020 <.plt>

00000000004010b0 <free@plt>:
  4010b0:       ff 25 a2 3f 00 00       jmp    QWORD PTR [rip+0x3fa2]        # 405058 <free@GLIBC_2.2.5>
  4010b6:       68 08 00 00 00          push   0x8
  4010bb:       e9 60 ff ff ff          jmp    401020 <.plt>

00000000004010c0 <_ZNSt8ios_base4InitC1Ev@plt>:
  4010c0:       ff 25 9a 3f 00 00       jmp    QWORD PTR [rip+0x3f9a]        # 405060 <_ZNSt8ios_base4InitC1Ev@GLIBCXX_3.4>
  4010c6:       68 09 00 00 00          push   0x9
  4010cb:       e9 50 ff ff ff          jmp    401020 <.plt>

00000000004010d0 <_ZNSo9_M_insertIdEERSoT_@plt>:
  4010d0:       ff 25 92 3f 00 00       jmp    QWORD PTR [rip+0x3f92]        # 405068 <_ZNSo9_M_insertIdEERSoT_@GLIBCXX_3.4.9>
  4010d6:       68 0a 00 00 00          push   0xa
  4010db:       e9 40 ff ff ff          jmp    401020 <.plt>

00000000004010e0 <expf@plt>:
  4010e0:       ff 25 8a 3f 00 00       jmp    QWORD PTR [rip+0x3f8a]        # 405070 <expf@GLIBC_2.27>
  4010e6:       68 0b 00 00 00          push   0xb
  4010eb:       e9 30 ff ff ff          jmp    401020 <.plt>

00000000004010f0 <_ZNSt8ios_base4InitD1Ev@plt>:
  4010f0:       ff 25 82 3f 00 00       jmp    QWORD PTR [rip+0x3f82]        # 405078 <_ZNSt8ios_base4InitD1Ev@GLIBCXX_3.4>
  4010f6:       68 0c 00 00 00          push   0xc
  4010fb:       e9 20 ff ff ff          jmp    401020 <.plt>

Disassembly of section .text:

0000000000401100 <set_fast_math>:
  401100:       f3 0f 1e fa             endbr64
  401104:       0f ae 5c 24 fc          stmxcsr DWORD PTR [rsp-0x4]
  401109:       81 4c 24 fc 40 80 00    or     DWORD PTR [rsp-0x4],0x8040
  401110:       00
  401111:       0f ae 54 24 fc          ldmxcsr DWORD PTR [rsp-0x4]
  401116:       c3                      ret
  401117:       66 0f 1f 84 00 00 00    nop    WORD PTR [rax+rax*1+0x0]
  40111e:       00 00

0000000000401120 <_GLOBAL__sub_I_sum.cpp>:
  401120:       50                      push   rax
  401121:       bf f1 52 40 00          mov    edi,0x4052f1
  401126:       e8 95 ff ff ff          call   4010c0 <_ZNSt8ios_base4InitC1Ev@plt>
  40112b:       bf f0 10 40 00          mov    edi,0x4010f0
  401130:       be f1 52 40 00          mov    esi,0x4052f1
  401135:       ba 88 50 40 00          mov    edx,0x405088
  40113a:       58                      pop    rax
  40113b:       e9 30 ff ff ff          jmp    401070 <__cxa_atexit@plt>

0000000000401140 <_start>:
  401140:       f3 0f 1e fa             endbr64
  401144:       31 ed                   xor    ebp,ebp
  401146:       49 89 d1                mov    r9,rdx
  401149:       5e                      pop    rsi
  40114a:       48 89 e2                mov    rdx,rsp
  40114d:       48 83 e4 f0             and    rsp,0xfffffffffffffff0
  401151:       50                      push   rax
  401152:       54                      push   rsp
  401153:       49 c7 c0 80 21 40 00    mov    r8,0x402180
  40115a:       48 c7 c1 10 21 40 00    mov    rcx,0x402110
  401161:       48 c7 c7 a0 13 40 00    mov    rdi,0x4013a0
  401168:       ff 15 82 3e 00 00       call   QWORD PTR [rip+0x3e82]        # 404ff0 <__libc_start_main@GLIBC_2.2.5>
  40116e:       f4                      hlt
  40116f:       90                      nop

0000000000401170 <_dl_relocate_static_pie>:
  401170:       f3 0f 1e fa             endbr64
  401174:       c3                      ret
  401175:       66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  40117c:       00 00 00
  40117f:       90                      nop

0000000000401180 <deregister_tm_clones>:
  401180:       b8 90 50 40 00          mov    eax,0x405090
  401185:       48 3d 90 50 40 00       cmp    rax,0x405090
  40118b:       74 13                   je     4011a0 <deregister_tm_clones+0x20>
  40118d:       b8 00 00 00 00          mov    eax,0x0
  401192:       48 85 c0                test   rax,rax
  401195:       74 09                   je     4011a0 <deregister_tm_clones+0x20>
  401197:       bf 90 50 40 00          mov    edi,0x405090
  40119c:       ff e0                   jmp    rax
  40119e:       66 90                   xchg   ax,ax
  4011a0:       c3                      ret
  4011a1:       66 66 2e 0f 1f 84 00    data16 nop WORD PTR cs:[rax+rax*1+0x0]
  4011a8:       00 00 00 00
  4011ac:       0f 1f 40 00             nop    DWORD PTR [rax+0x0]

00000000004011b0 <register_tm_clones>:
  4011b0:       be 90 50 40 00          mov    esi,0x405090
  4011b5:       48 81 ee 90 50 40 00    sub    rsi,0x405090
  4011bc:       48 89 f0                mov    rax,rsi
  4011bf:       48 c1 ee 3f             shr    rsi,0x3f
  4011c3:       48 c1 f8 03             sar    rax,0x3
  4011c7:       48 01 c6                add    rsi,rax
  4011ca:       48 d1 fe                sar    rsi,1
  4011cd:       74 11                   je     4011e0 <register_tm_clones+0x30>
  4011cf:       b8 00 00 00 00          mov    eax,0x0
  4011d4:       48 85 c0                test   rax,rax
  4011d7:       74 07                   je     4011e0 <register_tm_clones+0x30>
  4011d9:       bf 90 50 40 00          mov    edi,0x405090
  4011de:       ff e0                   jmp    rax
  4011e0:       c3                      ret
  4011e1:       66 66 2e 0f 1f 84 00    data16 nop WORD PTR cs:[rax+rax*1+0x0]
  4011e8:       00 00 00 00
  4011ec:       0f 1f 40 00             nop    DWORD PTR [rax+0x0]

00000000004011f0 <__do_global_dtors_aux>:
  4011f0:       f3 0f 1e fa             endbr64
  4011f4:       80 3d f5 40 00 00 00    cmp    BYTE PTR [rip+0x40f5],0x0        # 4052f0 <completed.8061>
  4011fb:       75 13                   jne    401210 <__do_global_dtors_aux+0x20>
  4011fd:       55                      push   rbp
  4011fe:       48 89 e5                mov    rbp,rsp
  401201:       e8 7a ff ff ff          call   401180 <deregister_tm_clones>
  401206:       c6 05 e3 40 00 00 01    mov    BYTE PTR [rip+0x40e3],0x1        # 4052f0 <completed.8061>
  40120d:       5d                      pop    rbp
  40120e:       c3                      ret
  40120f:       90                      nop
  401210:       c3                      ret
  401211:       66 66 2e 0f 1f 84 00    data16 nop WORD PTR cs:[rax+rax*1+0x0]
  401218:       00 00 00 00
  40121c:       0f 1f 40 00             nop    DWORD PTR [rax+0x0]

0000000000401220 <frame_dummy>:
  401220:       f3 0f 1e fa             endbr64
  401224:       eb 8a                   jmp    4011b0 <register_tm_clones>
  401226:       66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  40122d:       00 00 00

0000000000401230 <_Z12random_floatv>:
  401230:       53                      push   rbx
  401231:       48 83 ec 10             sub    rsp,0x10
  401235:       c4 e2 79 58 05 c6 1d    vpbroadcastd xmm0,DWORD PTR [rip+0x1dc6]        # 403004 <_IO_stdin_used+0x4>
  40123c:       00 00
  40123e:       c5 f9 7f 04 24          vmovdqa XMMWORD PTR [rsp],xmm0
  401243:       66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  40124a:       00 00 00
  40124d:       0f 1f 00                nop    DWORD PTR [rax]
  401250:       e8 eb fd ff ff          call   401040 <rand@plt>
  401255:       89 c3                   mov    ebx,eax
  401257:       81 e3 ff ff 7f 00       and    ebx,0x7fffff
  40125d:       e8 de fd ff ff          call   401040 <rand@plt>
  401262:       48 98                   cdqe
  401264:       48 69 c8 09 04 02 81    imul   rcx,rax,0xffffffff81020409
  40126b:       48 c1 e9 20             shr    rcx,0x20
  40126f:       01 c1                   add    ecx,eax
  401271:       89 ca                   mov    edx,ecx
  401273:       c1 ea 1f                shr    edx,0x1f
  401276:       c1 f9 07                sar    ecx,0x7
  401279:       01 d1                   add    ecx,edx
  40127b:       89 ca                   mov    edx,ecx
  40127d:       c1 e2 08                shl    edx,0x8
  401280:       89 ce                   mov    esi,ecx
  401282:       29 d6                   sub    esi,edx
  401284:       01 ce                   add    esi,ecx
  401286:       01 c6                   add    esi,eax
  401288:       c1 e6 17                shl    esi,0x17
  40128b:       01 f3                   add    ebx,esi
  40128d:       81 c3 00 00 80 00       add    ebx,0x800000
  401293:       e8 a8 fd ff ff          call   401040 <rand@plt>
  401298:       69 c0 cd cc cc cc       imul   eax,eax,0xcccccccd
  40129e:       05 98 99 99 19          add    eax,0x19999998
  4012a3:       c4 e3 7b f0 c0 01       rorx   eax,eax,0x1
  4012a9:       89 d9                   mov    ecx,ebx
  4012ab:       81 c9 00 00 00 80       or     ecx,0x80000000
  4012b1:       3d 99 99 99 19          cmp    eax,0x19999999
  4012b6:       0f 43 cb                cmovae ecx,ebx
  4012b9:       c5 f9 6e c1             vmovd  xmm0,ecx
  4012bd:       c5 f9 db 0c 24          vpand  xmm1,xmm0,XMMWORD PTR [rsp]
  4012c2:       c5 f8 2e 0d 3e 1d 00    vucomiss xmm1,DWORD PTR [rip+0x1d3e]        # 403008 <_IO_stdin_used+0x8>
  4012c9:       00
  4012ca:       74 84                   je     401250 <_Z12random_floatv+0x20>
  4012cc:       48 83 c4 10             add    rsp,0x10
  4012d0:       5b                      pop    rbx
  4012d1:       c3                      ret
  4012d2:       66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  4012d9:       00 00 00
  4012dc:       0f 1f 40 00             nop    DWORD PTR [rax+0x0]

00000000004012e0 <_Z16mm256_sigmoid_psDv8_f>:
  4012e0:       c4 e2 7d 18 0d 1b 1d    vbroadcastss ymm1,DWORD PTR [rip+0x1d1b]        # 403004 <_IO_stdin_used+0x4>
  4012e7:       00 00
  4012e9:       c5 fc 54 c9             vandps ymm1,ymm0,ymm1
  4012ed:       c4 e2 7d 18 15 16 1d    vbroadcastss ymm2,DWORD PTR [rip+0x1d16]        # 40300c <_IO_stdin_used+0xc>
  4012f4:       00 00
  4012f6:       c5 ec c2 d9 02          vcmpleps ymm3,ymm2,ymm1
  4012fb:       c4 e2 7d 18 25 0c 1d    vbroadcastss ymm4,DWORD PTR [rip+0x1d0c]        # 403010 <_IO_stdin_used+0x10>
  401302:       00 00
  401304:       c5 f4 c2 e4 01          vcmpltps ymm4,ymm1,ymm4
  401309:       c5 dc 54 db             vandps ymm3,ymm4,ymm3
  40130d:       c4 e2 7d 18 25 fe 1c    vbroadcastss ymm4,DWORD PTR [rip+0x1cfe]        # 403014 <_IO_stdin_used+0x14>
  401314:       00 00
  401316:       c5 dc c2 e9 02          vcmpleps ymm5,ymm4,ymm1
  40131b:       c5 f4 c2 d2 01          vcmpltps ymm2,ymm1,ymm2
  401320:       c4 e2 7d 18 35 ef 1c    vbroadcastss ymm6,DWORD PTR [rip+0x1cef]        # 403018 <_IO_stdin_used+0x18>
  401327:       00 00
  401329:       c4 e2 7d 18 3d ea 1c    vbroadcastss ymm7,DWORD PTR [rip+0x1cea]        # 40301c <_IO_stdin_used+0x1c>
  401330:       00 00
  401332:       c4 e2 75 a8 fe          vfmadd213ps ymm7,ymm1,ymm6
  401337:       c5 f4 c2 f4 01          vcmpltps ymm6,ymm1,ymm4
  40133c:       c4 e3 5d 4a f7 60       vblendvps ymm6,ymm4,ymm7,ymm6
  401342:       c5 ec 54 d5             vandps ymm2,ymm2,ymm5
  401346:       c4 e2 7d 18 2d d1 1c    vbroadcastss ymm5,DWORD PTR [rip+0x1cd1]        # 403020 <_IO_stdin_used+0x20>
  40134d:       00 00
  40134f:       c4 e2 7d 18 3d cc 1c    vbroadcastss ymm7,DWORD PTR [rip+0x1ccc]        # 403024 <_IO_stdin_used+0x24>
  401356:       00 00
  401358:       c4 e2 75 a8 fd          vfmadd213ps ymm7,ymm1,ymm5
  40135d:       c5 d0 57 ed             vxorps xmm5,xmm5,xmm5
  401361:       c4 e3 4d 4a d7 20       vblendvps ymm2,ymm6,ymm7,ymm2
  401367:       c4 e2 7d 18 35 b8 1c    vbroadcastss ymm6,DWORD PTR [rip+0x1cb8]        # 403028 <_IO_stdin_used+0x28>
  40136e:       00 00
  401370:       c4 e2 7d 18 3d b3 1c    vbroadcastss ymm7,DWORD PTR [rip+0x1cb3]        # 40302c <_IO_stdin_used+0x2c>
  401377:       00 00
  401379:       c4 e2 75 a8 fe          vfmadd213ps ymm7,ymm1,ymm6
  40137e:       c4 e3 6d 4a cf 30       vblendvps ymm1,ymm2,ymm7,ymm3
  401384:       c5 fc c2 c5 01          vcmpltps ymm0,ymm0,ymm5
  401389:       c5 dc 5c d1             vsubps ymm2,ymm4,ymm1
  40138d:       c4 e3 75 4a c2 00       vblendvps ymm0,ymm1,ymm2,ymm0
  401393:       c3                      ret
  401394:       66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  40139b:       00 00 00
  40139e:       66 90                   xchg   ax,ax

00000000004013a0 <main>:
  4013a0:       55                      push   rbp
  4013a1:       41 57                   push   r15
  4013a3:       41 56                   push   r14
  4013a5:       41 55                   push   r13
  4013a7:       41 54                   push   r12
  4013a9:       53                      push   rbx
  4013aa:       48 81 ec 88 01 00 00    sub    rsp,0x188
  4013b1:       48 8d 7c 24 78          lea    rdi,[rsp+0x78]
  4013b6:       be 20 00 00 00          mov    esi,0x20
  4013bb:       ba 00 28 6b ee          mov    edx,0xee6b2800
  4013c0:       e8 8b fc ff ff          call   401050 <posix_memalign@plt>
  4013c5:       45 31 e4                xor    r12d,r12d
  4013c8:       41 bd 00 00 00 00       mov    r13d,0x0
  4013ce:       85 c0                   test   eax,eax
  4013d0:       75 05                   jne    4013d7 <main+0x37>
  4013d2:       4c 8b 6c 24 78          mov    r13,QWORD PTR [rsp+0x78]
  4013d7:       48 8d 7c 24 78          lea    rdi,[rsp+0x78]
  4013dc:       be 20 00 00 00          mov    esi,0x20
  4013e1:       ba 00 28 6b ee          mov    edx,0xee6b2800
  4013e6:       e8 65 fc ff ff          call   401050 <posix_memalign@plt>
  4013eb:       89 44 24 20             mov    DWORD PTR [rsp+0x20],eax
  4013ef:       4c 8b 74 24 78          mov    r14,QWORD PTR [rsp+0x78]
  4013f4:       48 8d 7c 24 78          lea    rdi,[rsp+0x78]
  4013f9:       be 20 00 00 00          mov    esi,0x20
  4013fe:       ba 00 28 6b ee          mov    edx,0xee6b2800
  401403:       e8 48 fc ff ff          call   401050 <posix_memalign@plt>
  401408:       89 c5                   mov    ebp,eax
  40140a:       4c 8b 7c 24 78          mov    r15,QWORD PTR [rsp+0x78]
  40140f:       31 ff                   xor    edi,edi
  401411:       e8 6a fc ff ff          call   401080 <time@plt>
  401416:       89 c7                   mov    edi,eax
  401418:       e8 73 fc ff ff          call   401090 <srand@plt>
  40141d:       0f 1f 00                nop    DWORD PTR [rax]
  401420:       e8 1b fc ff ff          call   401040 <rand@plt>
  401425:       89 c3                   mov    ebx,eax
  401427:       81 e3 ff ff 7f 00       and    ebx,0x7fffff
  40142d:       e8 0e fc ff ff          call   401040 <rand@plt>
  401432:       48 98                   cdqe
  401434:       48 69 c8 09 04 02 81    imul   rcx,rax,0xffffffff81020409
  40143b:       48 c1 e9 20             shr    rcx,0x20
  40143f:       01 c1                   add    ecx,eax
  401441:       89 ca                   mov    edx,ecx
  401443:       c1 ea 1f                shr    edx,0x1f
  401446:       c1 f9 07                sar    ecx,0x7
  401449:       01 d1                   add    ecx,edx
  40144b:       89 ca                   mov    edx,ecx
  40144d:       c1 e2 08                shl    edx,0x8
  401450:       89 ce                   mov    esi,ecx
  401452:       29 d6                   sub    esi,edx
  401454:       01 ce                   add    esi,ecx
  401456:       01 c6                   add    esi,eax
  401458:       c1 e6 17                shl    esi,0x17
  40145b:       01 f3                   add    ebx,esi
  40145d:       81 c3 00 00 80 00       add    ebx,0x800000
  401463:       e8 d8 fb ff ff          call   401040 <rand@plt>
  401468:       69 c0 cd cc cc cc       imul   eax,eax,0xcccccccd
  40146e:       05 98 99 99 19          add    eax,0x19999998
  401473:       c4 e3 7b f0 c8 01       rorx   ecx,eax,0x1
  401479:       89 d8                   mov    eax,ebx
  40147b:       0d 00 00 00 80          or     eax,0x80000000
  401480:       81 f9 99 99 99 19       cmp    ecx,0x19999999
  401486:       0f 43 c3                cmovae eax,ebx
  401489:       89 c1                   mov    ecx,eax
  40148b:       81 e1 ff ff ff 7f       and    ecx,0x7fffffff
  401491:       c5 f9 6e c1             vmovd  xmm0,ecx
  401495:       c5 f8 2e 05 6b 1b 00    vucomiss xmm0,DWORD PTR [rip+0x1b6b]        # 403008 <_IO_stdin_used+0x8>
  40149c:       00
  40149d:       74 81                   je     401420 <main+0x80>
  40149f:       43 89 44 a5 00          mov    DWORD PTR [r13+r12*4+0x0],eax
  4014a4:       49 ff c4                inc    r12
  4014a7:       49 81 fc 00 ca 9a 3b    cmp    r12,0x3b9aca00
  4014ae:       0f 85 6c ff ff ff       jne    401420 <main+0x80>
  4014b4:       31 c0                   xor    eax,eax
  4014b6:       83 7c 24 20 00          cmp    DWORD PTR [rsp+0x20],0x0
  4014bb:       4c 0f 45 f0             cmovne r14,rax
  4014bf:       85 ed                   test   ebp,ebp
  4014c1:       4c 0f 45 f8             cmovne r15,rax
  4014c5:       e8 66 fb ff ff          call   401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>
  4014ca:       49 89 c4                mov    r12,rax
  4014cd:       b8 00 28 6b ee          mov    eax,0xee6b2800
  4014d2:       4a 8d 0c 28             lea    rcx,[rax+r13*1]
  4014d6:       49 39 ce                cmp    r14,rcx
  4014d9:       0f 83 da 00 00 00       jae    4015b9 <main+0x219>
  4014df:       4c 01 f0                add    rax,r14
  4014e2:       49 39 c5                cmp    r13,rax
  4014e5:       0f 83 ce 00 00 00       jae    4015b9 <main+0x219>
  4014eb:       bb 03 00 00 00          mov    ebx,0x3
  4014f0:       c4 e2 79 18 05 37 1b    vbroadcastss xmm0,DWORD PTR [rip+0x1b37]        # 403030 <_IO_stdin_used+0x30>
  4014f7:       00 00
  4014f9:       c5 f8 29 44 24 20       vmovaps XMMWORD PTR [rsp+0x20],xmm0
  4014ff:       90                      nop
  401500:       c4 c1 7a 10 44 9d f4    vmovss xmm0,DWORD PTR [r13+rbx*4-0xc]
  401507:       c5 f8 57 44 24 20       vxorps xmm0,xmm0,XMMWORD PTR [rsp+0x20]
  40150d:       e8 ce fb ff ff          call   4010e0 <expf@plt>
  401512:       c5 fa 10 0d fa 1a 00    vmovss xmm1,DWORD PTR [rip+0x1afa]        # 403014 <_IO_stdin_used+0x14>
  401519:       00
  40151a:       c5 fa 58 c1             vaddss xmm0,xmm0,xmm1
  40151e:       c5 f2 5e c0             vdivss xmm0,xmm1,xmm0
  401522:       c4 c1 7a 11 44 9e f4    vmovss DWORD PTR [r14+rbx*4-0xc],xmm0
  401529:       c4 c1 7a 10 44 9d f8    vmovss xmm0,DWORD PTR [r13+rbx*4-0x8]
  401530:       c5 f8 57 44 24 20       vxorps xmm0,xmm0,XMMWORD PTR [rsp+0x20]
  401536:       e8 a5 fb ff ff          call   4010e0 <expf@plt>
  40153b:       c5 fa 10 0d d1 1a 00    vmovss xmm1,DWORD PTR [rip+0x1ad1]        # 403014 <_IO_stdin_used+0x14>
  401542:       00
  401543:       c5 fa 58 c1             vaddss xmm0,xmm0,xmm1
  401547:       c5 f2 5e c0             vdivss xmm0,xmm1,xmm0
  40154b:       c4 c1 7a 11 44 9e f8    vmovss DWORD PTR [r14+rbx*4-0x8],xmm0
  401552:       c4 c1 7a 10 44 9d fc    vmovss xmm0,DWORD PTR [r13+rbx*4-0x4]
  401559:       c5 f8 57 44 24 20       vxorps xmm0,xmm0,XMMWORD PTR [rsp+0x20]
  40155f:       e8 7c fb ff ff          call   4010e0 <expf@plt>
  401564:       c5 fa 10 0d a8 1a 00    vmovss xmm1,DWORD PTR [rip+0x1aa8]        # 403014 <_IO_stdin_used+0x14>
  40156b:       00
  40156c:       c5 fa 58 c1             vaddss xmm0,xmm0,xmm1
  401570:       c5 f2 5e c0             vdivss xmm0,xmm1,xmm0
  401574:       c4 c1 7a 11 44 9e fc    vmovss DWORD PTR [r14+rbx*4-0x4],xmm0
  40157b:       c4 c1 7a 10 44 9d 00    vmovss xmm0,DWORD PTR [r13+rbx*4+0x0]
  401582:       c5 f8 57 44 24 20       vxorps xmm0,xmm0,XMMWORD PTR [rsp+0x20]
  401588:       e8 53 fb ff ff          call   4010e0 <expf@plt>
  40158d:       c5 fa 10 0d 7f 1a 00    vmovss xmm1,DWORD PTR [rip+0x1a7f]        # 403014 <_IO_stdin_used+0x14>
  401594:       00
  401595:       c5 fa 58 c1             vaddss xmm0,xmm0,xmm1
  401599:       c5 f2 5e c0             vdivss xmm0,xmm1,xmm0
  40159d:       c4 c1 7a 11 04 9e       vmovss DWORD PTR [r14+rbx*4],xmm0
  4015a3:       48 83 c3 04             add    rbx,0x4
  4015a7:       48 81 fb 03 ca 9a 3b    cmp    rbx,0x3b9aca03
  4015ae:       0f 85 4c ff ff ff       jne    401500 <main+0x160>
  4015b4:       e9 9f 04 00 00          jmp    401a58 <main+0x6b8>
  4015b9:       bb 18 00 00 00          mov    ebx,0x18
  4015be:       c4 e2 7d 18 05 69 1a    vbroadcastss ymm0,DWORD PTR [rip+0x1a69]        # 403030 <_IO_stdin_used+0x30>
  4015c5:       00 00
  4015c7:       c5 fc 11 84 24 a0 00    vmovups YMMWORD PTR [rsp+0xa0],ymm0
  4015ce:       00 00
  4015d0:       c4 e2 7d 18 05 3b 1a    vbroadcastss ymm0,DWORD PTR [rip+0x1a3b]        # 403014 <_IO_stdin_used+0x14>
  4015d7:       00 00
  4015d9:       c5 fc 11 84 24 80 00    vmovups YMMWORD PTR [rsp+0x80],ymm0
  4015e0:       00 00
  4015e2:       66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  4015e9:       00 00 00
  4015ec:       0f 1f 40 00             nop    DWORD PTR [rax+0x0]
  4015f0:       c5 fc 10 84 24 a0 00    vmovups ymm0,YMMWORD PTR [rsp+0xa0]
  4015f7:       00 00
  4015f9:       c4 c1 7c 57 44 9d a0    vxorps ymm0,ymm0,YMMWORD PTR [r13+rbx*4-0x60]
  401600:       c5 fc 11 44 24 20       vmovups YMMWORD PTR [rsp+0x20],ymm0
  401606:       c4 e3 7d 19 c0 01       vextractf128 xmm0,ymm0,0x1
  40160c:       c5 f8 29 44 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm0
  401612:       c5 f8 77                vzeroupper
  401615:       e8 c6 fa ff ff          call   4010e0 <expf@plt>
  40161a:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  40161f:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xf5
  401626:       f5
  401627:       e8 b4 fa ff ff          call   4010e0 <expf@plt>
  40162c:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401631:       c4 e3 71 21 c0 10       vinsertps xmm0,xmm1,xmm0,0x10
  401637:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  40163c:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0x4e
  401643:       4e
  401644:       e8 97 fa ff ff          call   4010e0 <expf@plt>
  401649:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  40164e:       c4 e3 71 21 c0 20       vinsertps xmm0,xmm1,xmm0,0x20
  401654:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  401659:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xe7
  401660:       e7
  401661:       e8 7a fa ff ff          call   4010e0 <expf@plt>
  401666:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  40166b:       c4 e3 71 21 c0 30       vinsertps xmm0,xmm1,xmm0,0x30
  401671:       c5 f8 29 44 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm0
  401677:       c5 fc 10 44 24 20       vmovups ymm0,YMMWORD PTR [rsp+0x20]
  40167d:       c5 f8 77                vzeroupper
  401680:       e8 5b fa ff ff          call   4010e0 <expf@plt>
  401685:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  40168a:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xf5
  401691:       f5
  401692:       e8 49 fa ff ff          call   4010e0 <expf@plt>
  401697:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  40169c:       c4 e3 71 21 c0 10       vinsertps xmm0,xmm1,xmm0,0x10
  4016a2:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4016a7:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0x4e
  4016ae:       4e
  4016af:       e8 2c fa ff ff          call   4010e0 <expf@plt>
  4016b4:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4016b9:       c4 e3 71 21 c0 20       vinsertps xmm0,xmm1,xmm0,0x20
  4016bf:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4016c4:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xe7
  4016cb:       e7
  4016cc:       e8 0f fa ff ff          call   4010e0 <expf@plt>
  4016d1:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4016d6:       c4 e3 71 21 c0 30       vinsertps xmm0,xmm1,xmm0,0x30
  4016dc:       c4 e3 7d 18 44 24 40    vinsertf128 ymm0,ymm0,XMMWORD PTR [rsp+0x40],0x1
  4016e3:       01
  4016e4:       c5 fc 10 94 24 80 00    vmovups ymm2,YMMWORD PTR [rsp+0x80]
  4016eb:       00 00
  4016ed:       c5 fc 58 c2             vaddps ymm0,ymm0,ymm2
  4016f1:       c5 fc 53 c8             vrcpps ymm1,ymm0
  4016f5:       c4 e2 75 ac c2          vfnmadd213ps ymm0,ymm1,ymm2
  4016fa:       c4 e2 75 98 c1          vfmadd132ps ymm0,ymm1,ymm1
  4016ff:       c4 c1 7c 11 44 9e a0    vmovups YMMWORD PTR [r14+rbx*4-0x60],ymm0
  401706:       c5 fc 10 84 24 a0 00    vmovups ymm0,YMMWORD PTR [rsp+0xa0]
  40170d:       00 00
  40170f:       c4 c1 7c 57 44 9d c0    vxorps ymm0,ymm0,YMMWORD PTR [r13+rbx*4-0x40]
  401716:       c5 fc 11 44 24 20       vmovups YMMWORD PTR [rsp+0x20],ymm0
  40171c:       c4 e3 7d 19 c0 01       vextractf128 xmm0,ymm0,0x1
  401722:       c5 f8 29 44 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm0
  401728:       c5 f8 77                vzeroupper
  40172b:       e8 b0 f9 ff ff          call   4010e0 <expf@plt>
  401730:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  401735:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xf5
  40173c:       f5
  40173d:       e8 9e f9 ff ff          call   4010e0 <expf@plt>
  401742:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401747:       c4 e3 71 21 c0 10       vinsertps xmm0,xmm1,xmm0,0x10
  40174d:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  401752:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0x4e
  401759:       4e
  40175a:       e8 81 f9 ff ff          call   4010e0 <expf@plt>
  40175f:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401764:       c4 e3 71 21 c0 20       vinsertps xmm0,xmm1,xmm0,0x20
  40176a:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  40176f:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xe7
  401776:       e7
  401777:       e8 64 f9 ff ff          call   4010e0 <expf@plt>
  40177c:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401781:       c4 e3 71 21 c0 30       vinsertps xmm0,xmm1,xmm0,0x30
  401787:       c5 f8 29 44 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm0
  40178d:       c5 fc 10 44 24 20       vmovups ymm0,YMMWORD PTR [rsp+0x20]
  401793:       c5 f8 77                vzeroupper
  401796:       e8 45 f9 ff ff          call   4010e0 <expf@plt>
  40179b:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4017a0:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xf5
  4017a7:       f5
  4017a8:       e8 33 f9 ff ff          call   4010e0 <expf@plt>
  4017ad:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4017b2:       c4 e3 71 21 c0 10       vinsertps xmm0,xmm1,xmm0,0x10
  4017b8:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4017bd:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0x4e
  4017c4:       4e
  4017c5:       e8 16 f9 ff ff          call   4010e0 <expf@plt>
  4017ca:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4017cf:       c4 e3 71 21 c0 20       vinsertps xmm0,xmm1,xmm0,0x20
  4017d5:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4017da:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xe7
  4017e1:       e7
  4017e2:       e8 f9 f8 ff ff          call   4010e0 <expf@plt>
  4017e7:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4017ec:       c4 e3 71 21 c0 30       vinsertps xmm0,xmm1,xmm0,0x30
  4017f2:       c4 e3 7d 18 44 24 40    vinsertf128 ymm0,ymm0,XMMWORD PTR [rsp+0x40],0x1
  4017f9:       01
  4017fa:       c5 fc 10 94 24 80 00    vmovups ymm2,YMMWORD PTR [rsp+0x80]
  401801:       00 00
  401803:       c5 fc 58 c2             vaddps ymm0,ymm0,ymm2
  401807:       c5 fc 53 c8             vrcpps ymm1,ymm0
  40180b:       c4 e2 75 ac c2          vfnmadd213ps ymm0,ymm1,ymm2
  401810:       c4 e2 75 98 c1          vfmadd132ps ymm0,ymm1,ymm1
  401815:       c4 c1 7c 11 44 9e c0    vmovups YMMWORD PTR [r14+rbx*4-0x40],ymm0
  40181c:       c5 fc 10 84 24 a0 00    vmovups ymm0,YMMWORD PTR [rsp+0xa0]
  401823:       00 00
  401825:       c4 c1 7c 57 44 9d e0    vxorps ymm0,ymm0,YMMWORD PTR [r13+rbx*4-0x20]
  40182c:       c5 fc 11 44 24 20       vmovups YMMWORD PTR [rsp+0x20],ymm0
  401832:       c4 e3 7d 19 c0 01       vextractf128 xmm0,ymm0,0x1
  401838:       c5 f8 29 44 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm0
  40183e:       c5 f8 77                vzeroupper
  401841:       e8 9a f8 ff ff          call   4010e0 <expf@plt>
  401846:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  40184b:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xf5
  401852:       f5
  401853:       e8 88 f8 ff ff          call   4010e0 <expf@plt>
  401858:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  40185d:       c4 e3 71 21 c0 10       vinsertps xmm0,xmm1,xmm0,0x10
  401863:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  401868:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0x4e
  40186f:       4e
  401870:       e8 6b f8 ff ff          call   4010e0 <expf@plt>
  401875:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  40187a:       c4 e3 71 21 c0 20       vinsertps xmm0,xmm1,xmm0,0x20
  401880:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  401885:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xe7
  40188c:       e7
  40188d:       e8 4e f8 ff ff          call   4010e0 <expf@plt>
  401892:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401897:       c4 e3 71 21 c0 30       vinsertps xmm0,xmm1,xmm0,0x30
  40189d:       c5 f8 29 44 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm0
  4018a3:       c5 fc 10 44 24 20       vmovups ymm0,YMMWORD PTR [rsp+0x20]
  4018a9:       c5 f8 77                vzeroupper
  4018ac:       e8 2f f8 ff ff          call   4010e0 <expf@plt>
  4018b1:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4018b6:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xf5
  4018bd:       f5
  4018be:       e8 1d f8 ff ff          call   4010e0 <expf@plt>
  4018c3:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4018c8:       c4 e3 71 21 c0 10       vinsertps xmm0,xmm1,xmm0,0x10
  4018ce:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4018d3:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0x4e
  4018da:       4e
  4018db:       e8 00 f8 ff ff          call   4010e0 <expf@plt>
  4018e0:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4018e5:       c4 e3 71 21 c0 20       vinsertps xmm0,xmm1,xmm0,0x20
  4018eb:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4018f0:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xe7
  4018f7:       e7
  4018f8:       e8 e3 f7 ff ff          call   4010e0 <expf@plt>
  4018fd:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401902:       c4 e3 71 21 c0 30       vinsertps xmm0,xmm1,xmm0,0x30
  401908:       c4 e3 7d 18 44 24 40    vinsertf128 ymm0,ymm0,XMMWORD PTR [rsp+0x40],0x1
  40190f:       01
  401910:       c5 fc 10 94 24 80 00    vmovups ymm2,YMMWORD PTR [rsp+0x80]
  401917:       00 00
  401919:       c5 fc 58 c2             vaddps ymm0,ymm0,ymm2
  40191d:       c5 fc 53 c8             vrcpps ymm1,ymm0
  401921:       c4 e2 75 ac c2          vfnmadd213ps ymm0,ymm1,ymm2
  401926:       c4 e2 75 98 c1          vfmadd132ps ymm0,ymm1,ymm1
  40192b:       c4 c1 7c 11 44 9e e0    vmovups YMMWORD PTR [r14+rbx*4-0x20],ymm0
  401932:       c5 fc 10 84 24 a0 00    vmovups ymm0,YMMWORD PTR [rsp+0xa0]
  401939:       00 00
  40193b:       c4 c1 7c 57 44 9d 00    vxorps ymm0,ymm0,YMMWORD PTR [r13+rbx*4+0x0]
  401942:       c5 fc 11 44 24 20       vmovups YMMWORD PTR [rsp+0x20],ymm0
  401948:       c4 e3 7d 19 c0 01       vextractf128 xmm0,ymm0,0x1
  40194e:       c5 f8 29 44 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm0
  401954:       c5 f8 77                vzeroupper
  401957:       e8 84 f7 ff ff          call   4010e0 <expf@plt>
  40195c:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  401961:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xf5
  401968:       f5
  401969:       e8 72 f7 ff ff          call   4010e0 <expf@plt>
  40196e:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401973:       c4 e3 71 21 c0 10       vinsertps xmm0,xmm1,xmm0,0x10
  401979:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  40197e:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0x4e
  401985:       4e
  401986:       e8 55 f7 ff ff          call   4010e0 <expf@plt>
  40198b:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401990:       c4 e3 71 21 c0 20       vinsertps xmm0,xmm1,xmm0,0x20
  401996:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  40199b:       c4 e3 79 04 44 24 40    vpermilps xmm0,XMMWORD PTR [rsp+0x40],0xe7
  4019a2:       e7
  4019a3:       e8 38 f7 ff ff          call   4010e0 <expf@plt>
  4019a8:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4019ad:       c4 e3 71 21 c0 30       vinsertps xmm0,xmm1,xmm0,0x30
  4019b3:       c5 f8 29 44 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm0
  4019b9:       c5 fc 10 44 24 20       vmovups ymm0,YMMWORD PTR [rsp+0x20]
  4019bf:       c5 f8 77                vzeroupper
  4019c2:       e8 19 f7 ff ff          call   4010e0 <expf@plt>
  4019c7:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4019cc:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xf5
  4019d3:       f5
  4019d4:       e8 07 f7 ff ff          call   4010e0 <expf@plt>
  4019d9:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4019de:       c4 e3 71 21 c0 10       vinsertps xmm0,xmm1,xmm0,0x10
  4019e4:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  4019e9:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0x4e
  4019f0:       4e
  4019f1:       e8 ea f6 ff ff          call   4010e0 <expf@plt>
  4019f6:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  4019fb:       c4 e3 71 21 c0 20       vinsertps xmm0,xmm1,xmm0,0x20
  401a01:       c5 f8 29 04 24          vmovaps XMMWORD PTR [rsp],xmm0
  401a06:       c4 e3 79 04 44 24 20    vpermilps xmm0,XMMWORD PTR [rsp+0x20],0xe7
  401a0d:       e7
  401a0e:       e8 cd f6 ff ff          call   4010e0 <expf@plt>
  401a13:       c5 f8 28 0c 24          vmovaps xmm1,XMMWORD PTR [rsp]
  401a18:       c4 e3 71 21 c0 30       vinsertps xmm0,xmm1,xmm0,0x30
  401a1e:       c4 e3 7d 18 44 24 40    vinsertf128 ymm0,ymm0,XMMWORD PTR [rsp+0x40],0x1
  401a25:       01
  401a26:       c5 fc 10 94 24 80 00    vmovups ymm2,YMMWORD PTR [rsp+0x80]
  401a2d:       00 00
  401a2f:       c5 fc 58 c2             vaddps ymm0,ymm0,ymm2
  401a33:       c5 fc 53 c8             vrcpps ymm1,ymm0
  401a37:       c4 e2 75 ac c2          vfnmadd213ps ymm0,ymm1,ymm2
  401a3c:       c4 e2 75 98 c1          vfmadd132ps ymm0,ymm1,ymm1
  401a41:       c4 c1 7c 11 04 9e       vmovups YMMWORD PTR [r14+rbx*4],ymm0
  401a47:       48 83 c3 20             add    rbx,0x20
  401a4b:       48 81 fb 18 ca 9a 3b    cmp    rbx,0x3b9aca18
  401a52:       0f 85 98 fb ff ff       jne    4015f0 <main+0x250>
  401a58:       c5 f8 77                vzeroupper
  401a5b:       e8 d0 f5 ff ff          call   401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>
  401a60:       4c 29 e0                sub    rax,r12
  401a63:       48 b9 db 34 b6 d7 82    movabs rcx,0x431bde82d7b634db
  401a6a:       de 1b 43
  401a6d:       48 f7 e9                imul   rcx
  401a70:       48 89 d3                mov    rbx,rdx
  401a73:       48 89 d0                mov    rax,rdx
  401a76:       48 c1 e8 3f             shr    rax,0x3f
  401a7a:       48 c1 fb 12             sar    rbx,0x12
  401a7e:       48 01 c3                add    rbx,rax
  401a81:       48 c7 c5 e0 ff ff ff    mov    rbp,0xffffffffffffffe0
  401a88:       e8 a3 f5 ff ff          call   401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>
  401a8d:       49 89 c4                mov    r12,rax
  401a90:       c4 e2 7d 18 05 6b 15    vbroadcastss ymm0,DWORD PTR [rip+0x156b]        # 403004 <_IO_stdin_used+0x4>
  401a97:       00 00
  401a99:       c5 fc 11 84 24 40 01    vmovups YMMWORD PTR [rsp+0x140],ymm0
  401aa0:       00 00
  401aa2:       c4 62 7d 18 05 61 15    vbroadcastss ymm8,DWORD PTR [rip+0x1561]        # 40300c <_IO_stdin_used+0xc>
  401aa9:       00 00
  401aab:       c4 e2 7d 18 05 5c 15    vbroadcastss ymm0,DWORD PTR [rip+0x155c]        # 403010 <_IO_stdin_used+0x10>
  401ab2:       00 00
  401ab4:       c5 fc 11 44 24 20       vmovups YMMWORD PTR [rsp+0x20],ymm0
  401aba:       c4 e2 7d 18 1d 51 15    vbroadcastss ymm3,DWORD PTR [rip+0x1551]        # 403014 <_IO_stdin_used+0x14>
  401ac1:       00 00
  401ac3:       c4 e2 7d 18 05 4c 15    vbroadcastss ymm0,DWORD PTR [rip+0x154c]        # 403018 <_IO_stdin_used+0x18>
  401aca:       00 00
  401acc:       c5 fc 11 84 24 a0 00    vmovups YMMWORD PTR [rsp+0xa0],ymm0
  401ad3:       00 00
  401ad5:       c4 e2 7d 18 05 3e 15    vbroadcastss ymm0,DWORD PTR [rip+0x153e]        # 40301c <_IO_stdin_used+0x1c>
  401adc:       00 00
  401ade:       c5 fc 11 84 24 20 01    vmovups YMMWORD PTR [rsp+0x120],ymm0
  401ae5:       00 00
  401ae7:       c4 e2 7d 18 05 30 15    vbroadcastss ymm0,DWORD PTR [rip+0x1530]        # 403020 <_IO_stdin_used+0x20>
  401aee:       00 00
  401af0:       c5 fc 11 84 24 00 01    vmovups YMMWORD PTR [rsp+0x100],ymm0
  401af7:       00 00
  401af9:       c4 e2 7d 18 05 22 15    vbroadcastss ymm0,DWORD PTR [rip+0x1522]        # 403024 <_IO_stdin_used+0x24>
  401b00:       00 00
  401b02:       c5 fc 11 84 24 e0 00    vmovups YMMWORD PTR [rsp+0xe0],ymm0
  401b09:       00 00
  401b0b:       c4 e2 7d 18 05 14 15    vbroadcastss ymm0,DWORD PTR [rip+0x1514]        # 403028 <_IO_stdin_used+0x28>
  401b12:       00 00
  401b14:       c5 fc 11 84 24 80 00    vmovups YMMWORD PTR [rsp+0x80],ymm0
  401b1b:       00 00
  401b1d:       c4 e2 7d 18 05 06 15    vbroadcastss ymm0,DWORD PTR [rip+0x1506]        # 40302c <_IO_stdin_used+0x2c>
  401b24:       00 00
  401b26:       c5 fc 11 44 24 40       vmovups YMMWORD PTR [rsp+0x40],ymm0
  401b2c:       0f 1f 40 00             nop    DWORD PTR [rax+0x0]
  401b30:       41 0f 18 8c ad 00 01    prefetcht0 BYTE PTR [r13+rbp*4+0x100]
  401b37:       00 00
  401b39:       41 0f 18 8c ad 80 01    prefetcht0 BYTE PTR [r13+rbp*4+0x180]
  401b40:       00 00
  401b42:       c4 41 7c 28 9c ad 80    vmovaps ymm11,YMMWORD PTR [r13+rbp*4+0x80]
  401b49:       00 00 00
  401b4c:       c4 41 7c 28 ac ad a0    vmovaps ymm13,YMMWORD PTR [r13+rbp*4+0xa0]
  401b53:       00 00 00
  401b56:       c4 c1 7c 28 84 ad c0    vmovaps ymm0,YMMWORD PTR [r13+rbp*4+0xc0]
  401b5d:       00 00 00
  401b60:       c5 fc 11 84 24 60 01    vmovups YMMWORD PTR [rsp+0x160],ymm0
  401b67:       00 00
  401b69:       c5 fc 10 84 24 40 01    vmovups ymm0,YMMWORD PTR [rsp+0x140]
  401b70:       00 00
  401b72:       c5 24 54 f0             vandps ymm14,ymm11,ymm0
  401b76:       c5 fc 28 d0             vmovaps ymm2,ymm0
  401b7a:       c4 41 3c c2 fe 02       vcmpleps ymm15,ymm8,ymm14
  401b80:       c5 7c 10 64 24 20       vmovups ymm12,YMMWORD PTR [rsp+0x20]
  401b86:       c4 c1 0c c2 e4 01       vcmpltps ymm4,ymm14,ymm12
  401b8c:       c5 84 54 e4             vandps ymm4,ymm15,ymm4
  401b90:       c4 41 64 c2 fe 02       vcmpleps ymm15,ymm3,ymm14
  401b96:       c4 c1 0c c2 c0 01       vcmpltps ymm0,ymm14,ymm8
  401b9c:       c5 84 54 c0             vandps ymm0,ymm15,ymm0
  401ba0:       c5 0c c2 fb 01          vcmpltps ymm15,ymm14,ymm3
  401ba5:       c5 7c 10 8c 24 20 01    vmovups ymm9,YMMWORD PTR [rsp+0x120]
  401bac:       00 00
  401bae:       c5 7c 29 ce             vmovaps ymm6,ymm9
  401bb2:       c5 fc 10 8c 24 a0 00    vmovups ymm1,YMMWORD PTR [rsp+0xa0]
  401bb9:       00 00
  401bbb:       c4 e2 0d a8 f1          vfmadd213ps ymm6,ymm14,ymm1
  401bc0:       c4 e3 65 4a f6 f0       vblendvps ymm6,ymm3,ymm6,ymm15
  401bc6:       c5 c0 57 ff             vxorps xmm7,xmm7,xmm7
  401bca:       c5 24 c2 df 01          vcmpltps ymm11,ymm11,ymm7
  401bcf:       c5 7c 10 94 24 e0 00    vmovups ymm10,YMMWORD PTR [rsp+0xe0]
  401bd6:       00 00
  401bd8:       c4 41 7c 28 fa          vmovaps ymm15,ymm10
  401bdd:       c5 fc 10 ac 24 00 01    vmovups ymm5,YMMWORD PTR [rsp+0x100]
  401be4:       00 00
  401be6:       c4 62 0d a8 fd          vfmadd213ps ymm15,ymm14,ymm5
  401beb:       c5 fc 28 fd             vmovaps ymm7,ymm5
  401bef:       c4 c3 4d 4a c7 00       vblendvps ymm0,ymm6,ymm15,ymm0
  401bf5:       c5 fc 10 ac 24 80 00    vmovups ymm5,YMMWORD PTR [rsp+0x80]
  401bfc:       00 00
  401bfe:       c4 62 55 98 74 24 40    vfmadd132ps ymm14,ymm5,YMMWORD PTR [rsp+0x40]
  401c05:       c4 c3 7d 4a c6 40       vblendvps ymm0,ymm0,ymm14,ymm4
  401c0b:       c5 e4 5c e0             vsubps ymm4,ymm3,ymm0
  401c0f:       c4 e3 7d 4a c4 b0       vblendvps ymm0,ymm0,ymm4,ymm11
  401c15:       c5 fc 11 04 24          vmovups YMMWORD PTR [rsp],ymm0
  401c1a:       c5 7c 28 da             vmovaps ymm11,ymm2
  401c1e:       c5 94 54 c2             vandps ymm0,ymm13,ymm2
  401c22:       c5 bc c2 e0 02          vcmpleps ymm4,ymm8,ymm0
  401c27:       c4 c1 7c c2 f4 01       vcmpltps ymm6,ymm0,ymm12
  401c2d:       c5 cc 54 e4             vandps ymm4,ymm6,ymm4
  401c31:       c5 e4 c2 f0 02          vcmpleps ymm6,ymm3,ymm0
  401c36:       c5 7c c2 f3 01          vcmpltps ymm14,ymm0,ymm3
  401c3b:       c4 41 7c 28 f9          vmovaps ymm15,ymm9
  401c40:       c4 62 7d a8 f9          vfmadd213ps ymm15,ymm0,ymm1
  401c45:       c4 43 65 4a f7 e0       vblendvps ymm14,ymm3,ymm15,ymm14
  401c4b:       c4 41 7c c2 f8 01       vcmpltps ymm15,ymm0,ymm8
  401c51:       c5 84 54 f6             vandps ymm6,ymm15,ymm6
  401c55:       c4 41 7c 28 fa          vmovaps ymm15,ymm10
  401c5a:       c4 62 7d a8 ff          vfmadd213ps ymm15,ymm0,ymm7
  401c5f:       c4 c3 0d 4a f7 60       vblendvps ymm6,ymm14,ymm15,ymm6
  401c65:       c5 14 c2 2c 25 60 30    vcmpltps ymm13,ymm13,YMMWORD PTR ds:0x403060
  401c6c:       40 00 01
  401c6f:       c5 fc 10 4c 24 40       vmovups ymm1,YMMWORD PTR [rsp+0x40]
  401c75:       c4 e2 75 a8 c5          vfmadd213ps ymm0,ymm1,ymm5
  401c7a:       c4 e3 4d 4a c0 40       vblendvps ymm0,ymm6,ymm0,ymm4
  401c80:       c5 e4 5c e0             vsubps ymm4,ymm3,ymm0
  401c84:       c4 e3 7d 4a c4 d0       vblendvps ymm0,ymm0,ymm4,ymm13
  401c8a:       c5 fc 11 84 24 c0 00    vmovups YMMWORD PTR [rsp+0xc0],ymm0
  401c91:       00 00
  401c93:       c5 fc 10 94 24 60 01    vmovups ymm2,YMMWORD PTR [rsp+0x160]
  401c9a:       00 00
  401c9c:       c5 a4 54 c2             vandps ymm0,ymm11,ymm2
  401ca0:       c5 bc c2 e0 02          vcmpleps ymm4,ymm8,ymm0
  401ca5:       c5 e4 c2 f0 02          vcmpleps ymm6,ymm3,ymm0
  401caa:       c4 41 7c c2 f0 01       vcmpltps ymm14,ymm0,ymm8
  401cb0:       c5 8c 54 f6             vandps ymm6,ymm14,ymm6
  401cb4:       c5 7c c2 f3 01          vcmpltps ymm14,ymm0,ymm3
  401cb9:       c4 41 7c 28 f9          vmovaps ymm15,ymm9
  401cbe:       c5 fc 10 ac 24 a0 00    vmovups ymm5,YMMWORD PTR [rsp+0xa0]
  401cc5:       00 00
  401cc7:       c4 62 7d a8 fd          vfmadd213ps ymm15,ymm0,ymm5
  401ccc:       c4 43 65 4a f7 e0       vblendvps ymm14,ymm3,ymm15,ymm14
  401cd2:       c4 41 7c 28 fa          vmovaps ymm15,ymm10
  401cd7:       c4 62 7d a8 ff          vfmadd213ps ymm15,ymm0,ymm7
  401cdc:       c5 7c 28 ef             vmovaps ymm13,ymm7
  401ce0:       c4 c3 0d 4a f7 60       vblendvps ymm6,ymm14,ymm15,ymm6
  401ce6:       c4 41 7c c2 f4 01       vcmpltps ymm14,ymm0,ymm12
  401cec:       c5 8c 54 e4             vandps ymm4,ymm14,ymm4
  401cf0:       c5 fc 10 bc 24 80 00    vmovups ymm7,YMMWORD PTR [rsp+0x80]
  401cf7:       00 00
  401cf9:       c4 e2 75 a8 c7          vfmadd213ps ymm0,ymm1,ymm7
  401cfe:       c4 e3 4d 4a c0 40       vblendvps ymm0,ymm6,ymm0,ymm4
  401d04:       c5 ec c2 24 25 60 30    vcmpltps ymm4,ymm2,YMMWORD PTR ds:0x403060
  401d0b:       40 00 01
  401d0e:       c5 e8 57 d2             vxorps xmm2,xmm2,xmm2
  401d12:       c5 e4 5c f0             vsubps ymm6,ymm3,ymm0
  401d16:       c4 e3 7d 4a c6 40       vblendvps ymm0,ymm0,ymm6,ymm4
  401d1c:       c4 c1 7c 28 a4 ad e0    vmovaps ymm4,YMMWORD PTR [r13+rbp*4+0xe0]
  401d23:       00 00 00
  401d26:       c5 a4 54 f4             vandps ymm6,ymm11,ymm4
  401d2a:       c5 64 c2 e6 02          vcmpleps ymm12,ymm3,ymm6
  401d2f:       c5 4c c2 f3 01          vcmpltps ymm14,ymm6,ymm3
  401d34:       c4 62 4d a8 cd          vfmadd213ps ymm9,ymm6,ymm5
  401d39:       c4 43 65 4a f1 e0       vblendvps ymm14,ymm3,ymm9,ymm14
  401d3f:       c4 41 4c c2 f8 01       vcmpltps ymm15,ymm6,ymm8
  401d45:       c4 41 04 54 e4          vandps ymm12,ymm15,ymm12
  401d4a:       c4 42 4d a8 d5          vfmadd213ps ymm10,ymm6,ymm13
  401d4f:       c4 43 0d 4a e2 c0       vblendvps ymm12,ymm14,ymm10,ymm12
  401d55:       c5 3c c2 f6 02          vcmpleps ymm14,ymm8,ymm6
  401d5a:       c5 4c c2 7c 24 20 01    vcmpltps ymm15,ymm6,YMMWORD PTR [rsp+0x20]
  401d61:       c4 41 04 54 f6          vandps ymm14,ymm15,ymm14
  401d66:       c4 e2 75 a8 f7          vfmadd213ps ymm6,ymm1,ymm7
  401d6b:       c4 e3 1d 4a f6 e0       vblendvps ymm6,ymm12,ymm6,ymm14
  401d71:       c5 dc c2 e2 01          vcmpltps ymm4,ymm4,ymm2
  401d76:       c5 64 5c e6             vsubps ymm12,ymm3,ymm6
  401d7a:       c4 c3 4d 4a e4 40       vblendvps ymm4,ymm6,ymm12,ymm4
  401d80:       c5 fc 10 14 24          vmovups ymm2,YMMWORD PTR [rsp]
  401d85:       c4 c1 7c 29 94 af 80    vmovaps YMMWORD PTR [r15+rbp*4+0x80],ymm2
  401d8c:       00 00 00
  401d8f:       c5 fc 10 8c 24 c0 00    vmovups ymm1,YMMWORD PTR [rsp+0xc0]
  401d96:       00 00
  401d98:       c4 c1 7c 29 8c af a0    vmovaps YMMWORD PTR [r15+rbp*4+0xa0],ymm1
  401d9f:       00 00 00
  401da2:       c4 c1 7c 29 84 af c0    vmovaps YMMWORD PTR [r15+rbp*4+0xc0],ymm0
  401da9:       00 00 00
  401dac:       c4 c1 7c 29 a4 af e0    vmovaps YMMWORD PTR [r15+rbp*4+0xe0],ymm4
  401db3:       00 00 00
  401db6:       48 83 c5 20             add    rbp,0x20
  401dba:       48 81 fd e0 c9 9a 3b    cmp    rbp,0x3b9ac9e0
  401dc1:       0f 82 69 fd ff ff       jb     401b30 <main+0x790>
  401dc7:       c4 41 38 57 c0          vxorps xmm8,xmm8,xmm8
  401dcc:       c4 e1 bb 2a c3          vcvtsi2sd xmm0,xmm8,rbx
  401dd1:       c5 fb 11 04 24          vmovsd QWORD PTR [rsp],xmm0
  401dd6:       c5 f8 77                vzeroupper
  401dd9:       e8 52 f2 ff ff          call   401030 <_ZNSt6chrono3_V212system_clock3nowEv@plt>
  401dde:       4c 29 e0                sub    rax,r12
  401de1:       48 b9 db 34 b6 d7 82    movabs rcx,0x431bde82d7b634db
  401de8:       de 1b 43
  401deb:       48 f7 e9                imul   rcx
  401dee:       48 89 d0                mov    rax,rdx
  401df1:       48 c1 e8 3f             shr    rax,0x3f
  401df5:       48 c1 fa 12             sar    rdx,0x12
  401df9:       48 01 c2                add    rdx,rax
  401dfc:       48 89 94 24 c0 00 00    mov    QWORD PTR [rsp+0xc0],rdx
  401e03:       00
  401e04:       b0 01                   mov    al,0x1
  401e06:       31 db                   xor    ebx,ebx
  401e08:       c4 e2 79 18 0d f3 11    vbroadcastss xmm1,DWORD PTR [rip+0x11f3]        # 403004 <_IO_stdin_used+0x4>
  401e0f:       00 00
  401e11:       c5 fa 10 15 1b 12 00    vmovss xmm2,DWORD PTR [rip+0x121b]        # 403034 <_IO_stdin_used+0x34>
  401e18:       00
  401e19:       45 31 e4                xor    r12d,r12d
  401e1c:       c5 f8 29 4c 24 40       vmovaps XMMWORD PTR [rsp+0x40],xmm1
  401e22:       eb 21                   jmp    401e45 <main+0xaa5>
  401e24:       66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  401e2b:       00 00 00
  401e2e:       66 90                   xchg   ax,ax
  401e30:       41 ff c4                inc    r12d
  401e33:       31 c0                   xor    eax,eax
  401e35:       48 ff c3                inc    rbx
  401e38:       48 81 fb 00 ca 9a 3b    cmp    rbx,0x3b9aca00
  401e3f:       0f 84 1f 01 00 00       je     401f64 <main+0xbc4>
  401e45:       c4 c1 7a 10 24 9e       vmovss xmm4,DWORD PTR [r14+rbx*4]
  401e4b:       c4 c1 7a 10 1c 9f       vmovss xmm3,DWORD PTR [r15+rbx*4]
  401e51:       c5 da 5c c3             vsubss xmm0,xmm4,xmm3
  401e55:       c5 f8 54 c1             vandps xmm0,xmm0,xmm1
  401e59:       c5 f8 2e c2             vucomiss xmm0,xmm2
  401e5d:       76 d6                   jbe    401e35 <main+0xa95>
  401e5f:       a8 01                   test   al,0x1
  401e61:       74 cd                   je     401e30 <main+0xa90>
  401e63:       bf e0 51 40 00          mov    edi,0x4051e0
  401e68:       be 80 30 40 00          mov    esi,0x403080
  401e6d:       ba 10 00 00 00          mov    edx,0x10
  401e72:       c5 f8 29 44 24 20       vmovaps XMMWORD PTR [rsp+0x20],xmm0
  401e78:       c5 fa 11 9c 24 a0 00    vmovss DWORD PTR [rsp+0xa0],xmm3
  401e7f:       00 00
  401e81:       c5 fa 11 a4 24 80 00    vmovss DWORD PTR [rsp+0x80],xmm4
  401e88:       00 00
  401e8a:       e8 11 f2 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401e8f:       bf e0 51 40 00          mov    edi,0x4051e0
  401e94:       48 89 de                mov    rsi,rbx
  401e97:       e8 c4 f1 ff ff          call   401060 <_ZNSo9_M_insertImEERSoT_@plt>
  401e9c:       48 89 c5                mov    rbp,rax
  401e9f:       be 91 30 40 00          mov    esi,0x403091
  401ea4:       ba 03 00 00 00          mov    edx,0x3
  401ea9:       48 89 c7                mov    rdi,rax
  401eac:       e8 ef f1 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401eb1:       c4 c1 7a 10 44 9d 00    vmovss xmm0,DWORD PTR [r13+rbx*4+0x0]
  401eb8:       c5 fa 5a c0             vcvtss2sd xmm0,xmm0,xmm0
  401ebc:       48 89 ef                mov    rdi,rbp
  401ebf:       e8 0c f2 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  401ec4:       48 89 c5                mov    rbp,rax
  401ec7:       be 95 30 40 00          mov    esi,0x403095
  401ecc:       ba 0a 00 00 00          mov    edx,0xa
  401ed1:       48 89 c7                mov    rdi,rax
  401ed4:       e8 c7 f1 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401ed9:       c5 fa 10 84 24 80 00    vmovss xmm0,DWORD PTR [rsp+0x80]
  401ee0:       00 00
  401ee2:       c5 fa 5a c0             vcvtss2sd xmm0,xmm0,xmm0
  401ee6:       48 89 ef                mov    rdi,rbp
  401ee9:       e8 e2 f1 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  401eee:       48 89 c5                mov    rbp,rax
  401ef1:       be a0 30 40 00          mov    esi,0x4030a0
  401ef6:       ba 08 00 00 00          mov    edx,0x8
  401efb:       48 89 c7                mov    rdi,rax
  401efe:       e8 9d f1 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401f03:       c5 fa 10 84 24 a0 00    vmovss xmm0,DWORD PTR [rsp+0xa0]
  401f0a:       00 00
  401f0c:       c5 fa 5a c0             vcvtss2sd xmm0,xmm0,xmm0
  401f10:       48 89 ef                mov    rdi,rbp
  401f13:       e8 b8 f1 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  401f18:       48 89 c5                mov    rbp,rax
  401f1b:       be a9 30 40 00          mov    esi,0x4030a9
  401f20:       ba 0e 00 00 00          mov    edx,0xe
  401f25:       48 89 c7                mov    rdi,rax
  401f28:       e8 73 f1 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401f2d:       c5 f8 28 44 24 20       vmovaps xmm0,XMMWORD PTR [rsp+0x20]
  401f33:       c5 fa 5a c0             vcvtss2sd xmm0,xmm0,xmm0
  401f37:       48 89 ef                mov    rdi,rbp
  401f3a:       e8 91 f1 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  401f3f:       be e5 30 40 00          mov    esi,0x4030e5
  401f44:       ba 01 00 00 00          mov    edx,0x1
  401f49:       48 89 c7                mov    rdi,rax
  401f4c:       e8 4f f1 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401f51:       c5 fa 10 15 db 10 00    vmovss xmm2,DWORD PTR [rip+0x10db]        # 403034 <_IO_stdin_used+0x34>
  401f58:       00
  401f59:       c5 f8 28 4c 24 40       vmovaps xmm1,XMMWORD PTR [rsp+0x40]
  401f5f:       e9 cc fe ff ff          jmp    401e30 <main+0xa90>
  401f64:       c4 e1 bb 2a 84 24 c0    vcvtsi2sd xmm0,xmm8,QWORD PTR [rsp+0xc0]
  401f6b:       00 00 00
  401f6e:       c5 fb 11 44 24 20       vmovsd QWORD PTR [rsp+0x20],xmm0
  401f74:       bf c0 50 40 00          mov    edi,0x4050c0
  401f79:       be b8 30 40 00          mov    esi,0x4030b8
  401f7e:       ba 18 00 00 00          mov    edx,0x18
  401f83:       e8 18 f1 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401f88:       bf c0 50 40 00          mov    edi,0x4050c0
  401f8d:       be e5 30 40 00          mov    esi,0x4030e5
  401f92:       ba 01 00 00 00          mov    edx,0x1
  401f97:       e8 04 f1 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401f9c:       bf c0 50 40 00          mov    edi,0x4050c0
  401fa1:       be d1 30 40 00          mov    esi,0x4030d1
  401fa6:       ba 0b 00 00 00          mov    edx,0xb
  401fab:       e8 f0 f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401fb0:       c5 fb 10 05 80 10 00    vmovsd xmm0,QWORD PTR [rip+0x1080]        # 403038 <_IO_stdin_used+0x38>
  401fb7:       00
  401fb8:       bf c0 50 40 00          mov    edi,0x4050c0
  401fbd:       e8 0e f1 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  401fc2:       be dd 30 40 00          mov    esi,0x4030dd
  401fc7:       ba 09 00 00 00          mov    edx,0x9
  401fcc:       48 89 c7                mov    rdi,rax
  401fcf:       e8 cc f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401fd4:       bf c0 50 40 00          mov    edi,0x4050c0
  401fd9:       be e7 30 40 00          mov    esi,0x4030e7
  401fde:       ba 14 00 00 00          mov    edx,0x14
  401fe3:       e8 b8 f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  401fe8:       bf c0 50 40 00          mov    edi,0x4050c0
  401fed:       c5 fb 10 04 24          vmovsd xmm0,QWORD PTR [rsp]
  401ff2:       e8 d9 f0 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  401ff7:       be fc 30 40 00          mov    esi,0x4030fc
  401ffc:       ba 04 00 00 00          mov    edx,0x4
  402001:       48 89 c7                mov    rdi,rax
  402004:       e8 97 f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  402009:       bf c0 50 40 00          mov    edi,0x4050c0
  40200e:       be 01 31 40 00          mov    esi,0x403101
  402013:       ba 17 00 00 00          mov    edx,0x17
  402018:       e8 83 f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  40201d:       bf c0 50 40 00          mov    edi,0x4050c0
  402022:       c5 fb 10 44 24 20       vmovsd xmm0,QWORD PTR [rsp+0x20]
  402028:       e8 a3 f0 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  40202d:       be fc 30 40 00          mov    esi,0x4030fc
  402032:       ba 04 00 00 00          mov    edx,0x4
  402037:       48 89 c7                mov    rdi,rax
  40203a:       e8 61 f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  40203f:       bf c0 50 40 00          mov    edi,0x4050c0
  402044:       be 19 31 40 00          mov    esi,0x403119
  402049:       ba 0b 00 00 00          mov    edx,0xb
  40204e:       e8 4d f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  402053:       c5 fb 10 04 24          vmovsd xmm0,QWORD PTR [rsp]
  402058:       c5 fb 5e 44 24 20       vdivsd xmm0,xmm0,QWORD PTR [rsp+0x20]
  40205e:       bf c0 50 40 00          mov    edi,0x4050c0
  402063:       e8 68 f0 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  402068:       be 25 31 40 00          mov    esi,0x403125
  40206d:       ba 02 00 00 00          mov    edx,0x2
  402072:       48 89 c7                mov    rdi,rax
  402075:       e8 26 f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  40207a:       bf c0 50 40 00          mov    edi,0x4050c0
  40207f:       be 28 31 40 00          mov    esi,0x403128
  402084:       ba 0b 00 00 00          mov    edx,0xb
  402089:       e8 12 f0 ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  40208e:       b8 00 ca 9a 3b          mov    eax,0x3b9aca00
  402093:       44 29 e0                sub    eax,r12d
  402096:       c4 e1 d3 2a c0          vcvtsi2sd xmm0,xmm5,rax
  40209b:       c5 fb 59 05 9d 0f 00    vmulsd xmm0,xmm0,QWORD PTR [rip+0xf9d]        # 403040 <_IO_stdin_used+0x40>
  4020a2:       00
  4020a3:       bf c0 50 40 00          mov    edi,0x4050c0
  4020a8:       e8 23 f0 ff ff          call   4010d0 <_ZNSo9_M_insertIdEERSoT_@plt>
  4020ad:       be e5 30 40 00          mov    esi,0x4030e5
  4020b2:       ba 01 00 00 00          mov    edx,0x1
  4020b7:       48 89 c7                mov    rdi,rax
  4020ba:       e8 e1 ef ff ff          call   4010a0 <_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@plt>
  4020bf:       b8 80 28 6b ee          mov    eax,0xee6b2880
  4020c4:       41 0f ae 7c 05 00       clflush BYTE PTR [r13+rax*1+0x0]
  4020ca:       b8 00 29 6b ee          mov    eax,0xee6b2900
  4020cf:       41 0f ae 7c 05 00       clflush BYTE PTR [r13+rax*1+0x0]
  4020d5:       4c 89 ef                mov    rdi,r13
  4020d8:       e8 d3 ef ff ff          call   4010b0 <free@plt>
  4020dd:       4c 89 f7                mov    rdi,r14
  4020e0:       e8 cb ef ff ff          call   4010b0 <free@plt>
  4020e5:       4c 89 ff                mov    rdi,r15
  4020e8:       e8 c3 ef ff ff          call   4010b0 <free@plt>
  4020ed:       31 c0                   xor    eax,eax
  4020ef:       48 81 c4 88 01 00 00    add    rsp,0x188
  4020f6:       5b                      pop    rbx
  4020f7:       41 5c                   pop    r12
  4020f9:       41 5d                   pop    r13
  4020fb:       41 5e                   pop    r14
  4020fd:       41 5f                   pop    r15
  4020ff:       5d                      pop    rbp
  402100:       c3                      ret
  402101:       66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  402108:       00 00 00
  40210b:       0f 1f 44 00 00          nop    DWORD PTR [rax+rax*1+0x0]

0000000000402110 <__libc_csu_init>:
  402110:       f3 0f 1e fa             endbr64
  402114:       41 57                   push   r15
  402116:       4c 8d 3d b3 2c 00 00    lea    r15,[rip+0x2cb3]        # 404dd0 <__frame_dummy_init_array_entry>
  40211d:       41 56                   push   r14
  40211f:       49 89 d6                mov    r14,rdx
  402122:       41 55                   push   r13
  402124:       49 89 f5                mov    r13,rsi
  402127:       41 54                   push   r12
  402129:       41 89 fc                mov    r12d,edi
  40212c:       55                      push   rbp
  40212d:       48 8d 2d b4 2c 00 00    lea    rbp,[rip+0x2cb4]        # 404de8 <__do_global_dtors_aux_fini_array_entry>
  402134:       53                      push   rbx
  402135:       4c 29 fd                sub    rbp,r15
  402138:       48 83 ec 08             sub    rsp,0x8
  40213c:       e8 bf ee ff ff          call   401000 <_init>
  402141:       48 c1 fd 03             sar    rbp,0x3
  402145:       74 1f                   je     402166 <__libc_csu_init+0x56>
  402147:       31 db                   xor    ebx,ebx
  402149:       0f 1f 80 00 00 00 00    nop    DWORD PTR [rax+0x0]
  402150:       4c 89 f2                mov    rdx,r14
  402153:       4c 89 ee                mov    rsi,r13
  402156:       44 89 e7                mov    edi,r12d
  402159:       41 ff 14 df             call   QWORD PTR [r15+rbx*8]
  40215d:       48 83 c3 01             add    rbx,0x1
  402161:       48 39 dd                cmp    rbp,rbx
  402164:       75 ea                   jne    402150 <__libc_csu_init+0x40>
  402166:       48 83 c4 08             add    rsp,0x8
  40216a:       5b                      pop    rbx
  40216b:       5d                      pop    rbp
  40216c:       41 5c                   pop    r12
  40216e:       41 5d                   pop    r13
  402170:       41 5e                   pop    r14
  402172:       41 5f                   pop    r15
  402174:       c3                      ret
  402175:       66 66 2e 0f 1f 84 00    data16 nop WORD PTR cs:[rax+rax*1+0x0]
  40217c:       00 00 00 00

0000000000402180 <__libc_csu_fini>:
  402180:       f3 0f 1e fa             endbr64
  402184:       c3                      ret

Disassembly of section .fini:

0000000000402188 <_fini>:
  402188:       f3 0f 1e fa             endbr64
  40218c:       48 83 ec 08             sub    rsp,0x8
  402190:       48 83 c4 08             add    rsp,0x8
  402194:       c3                      ret
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

  • 来源:https://zhuanlan.zhihu.com/p/82105066[https://zhuanlan...
    mylaf阅读 3,547评论 0 0
  • 接触机器学习时间也不短了, 趁国庆放假, 做一下深度整理. 1. 大纲 若想在企业胜任算法相关岗位知识, 除了掌握...
    婉妃阅读 8,702评论 2 92
  • 损失函数与评价指标之间的关系[https://zhuanlan.zhihu.com/p/67469129] 1 P...
    RawLychee阅读 8,218评论 0 0
  • 0. 前言 近两年学术界对Transformer在CV上的应用可谓异常青睐,这里重点强调学术界的原因是目前工业界还...
    mrhalyang阅读 7,085评论 0 0
  • 说明 本文对深度学习中的重要组件——激活函数做系统性汇总。 了解激活函数 什么是激活函数 在神经网络中,一个节点的...
    氵也氵寿阅读 8,614评论 0 6

友情链接更多精彩内容