为什么 gcc/clang 选项 -fl 会使我的程序失败？ [已关闭]

Question

我的机器：AMD x86_64 上的 ArchWSL2

海湾合作委员会版本：14.2.1

clang版本：18.1.8

我在gcc/clang编译选项中添加了-flto，这使得我的程序在0.000000秒内完成运行。当我删除这个参数时，我的程序似乎正常。图片显示了我的结果。我想知道为什么 -flto 会造成这样的后果。

意想不到：

**gcc with -flto:** 
> Performing performance test...
BLOCK_CIPHER_THROUGHPUT: DES encryption
Execute time: 0.059215 s
Throughpt: 103.073688 Mbps
BLOCK_CIPHER_THROUGHPUT: DES decryption
Execute time: 0.000000 s
Throughpt: 22837.028650 Gbps

**clang with -flto:**
> Performing performance test...
BLOCK_CIPHER_THROUGHPUT: DES encryption
Execute time: 0.000000 s
Throughpt: 20482.695799 Gbps
BLOCK_CIPHER_THROUGHPUT: DES decryption
Execute time: 0.000000 s
Throughpt: 23746.870428 Gbps

正常：

**gcc without -flto:**
> Performing performance test...
BLOCK_CIPHER_THROUGHPUT: DES encryption
Execute time: 0.055438 s
Throughpt: 110.097214 Mbps
BLOCK_CIPHER_THROUGHPUT: DES decryption
Execute time: 0.054398 s
Throughpt: 112.200099 Mbps


**clang without -flto**
> Performing performance test...
BLOCK_CIPHER_THROUGHPUT: DES encryption
Execute time: 0.050459 s
Throughpt: 120.959877 Mbps
BLOCK_CIPHER_THROUGHPUT: DES decryption
Execute time: 0.049857 s
Throughpt: 122.419760 Mbps

我读过gcc关于-flto的选项手册，但这对我来说太难了。所以我问ChatGPT4o为什么我的des算法中添加了-flto编译选项后我的程序执行时间变成了0.00000s？它告诉我

通过积极的优化，编译器可能会检测到您的程序没有产生可观察到的副作用或其结果未被使用。这可能会导致整个执行被优化（例如，如果您的 DES 算法输出未使用或打印，编译器可能会完全删除计算）。
如果您的 DES 算法对常量数据进行操作或执行其结果可以静态确定的计算，则优化器可能会在编译时预先计算结果并删除运行时执行。
如果您使用简单的方法（例如，时间命令或时钟（））测量执行时间，-flto 优化可能会大大减少程序的运行时间，以致测量的时间会向下舍入到 0.00000 秒。

事实上，我不相信AI的答案。这只是一个参考。我想知道为什么-flto会造成这样的效果，或者真的像AI所说的那样吗？受到AI提到的第三点的启发，我给出了基准测试的不完整实现。

#ifndef BENCHMARK_H
#define BENCHMARK_H

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#define NSPERS 1000000000

#ifdef __linux__

#elif defined(_WIN32) || defined(_WIN64)

#include <windows.h>

#else

#endif

   /**
    * BITS_PER_SECOND
    * Runs a new benchmark.
    * bench the throughput of symmetric ciphers with (K/M/G)bps
    *
    * @param[in] LABEL          - the label for this benchmark.
    * @param[in] BENCHS         - Number of times each benchmark is ran.
    */
#define BPS_BENCH_START(_LABEL, _BENCHS)                 \
    {                                                    \
        uint64_t time_t[_BENCHS];                        \
        int benchs_ = _BENCHS;                       \
        int retrys;                                 \
        printf("BLOCK_CIPHER_THROUGHPUT: " _LABEL "\n"); \
        for (int _b = 0; _b < benchs_; _b++){


    /**
     * Measures the throughput of of FUNCTION.
     *
     * @param[in] FUNCTION      - the function executed.
     */
#define BPS_BENCH_ITEM(_FUNCTION, _ROUNDS)                  \
    retrys = _ROUNDS;                                  \
    _FUNCTION;                                              \
    time_bench_before();                                    \
    for (int _r = 0; _r < retrys; _r++) { _FUNCTION; } \
    time_bench_after(time_t, _b);


     /**
      * Prints the throughput of FUNCTION  with (K/M/G)bps
      * @param[in] DATASIZE             -bit length of data input to hash functions or block-size of block ciphers
      */
#define BPS_BENCH_FINAL(_DATASIZE)                               \
    }                                                            \
    print_sc_bps(time_t, benchs_, retrys, (_DATASIZE)); \
    }

#ifdef __cplusplus
extern "C" { /* start of __cplusplus */
#endif

    typedef struct timespec time_s;

    /**
     * Measures the time before a benchmark is executed.
     */
    void time_bench_before(void);

    /**
     * Measures the time after a benchmark.
     */
    void time_bench_after(uint64_t *t, int i);

    /**
     * Prints the last benchmark with bps.
     */
    void print_sc_bps(const uint64_t *t, int benches, int rounds, int block_size);

#ifdef __cplusplus
} /* end of __cplusplus */
#endif

#endif /* !BENCHMARK_H */

#include "benchmark.h"

/*============================================================================*/
/* Time Bench                                                                 */
/*============================================================================*/

#ifdef __linux__

static struct
{
    /** Stores the time measured before the execution of the benchmark. */
    time_s before;
    /** Stores the time measured after the execution of the benchmark. */
    time_s after;
} g_bench;

/**
 * compute the time between start and end, using the time_s struct
 *
 * @param end       -the start of the execution
 * @param start     -the end of the execution
 * @return
 */
static time_s time_sub(time_s *end, time_s *start)
{
    time_s temp;
    if ((end->tv_nsec - start->tv_nsec) < 0)
    {
        temp.tv_sec = end->tv_sec - start->tv_sec - 1;
        temp.tv_nsec = NSPERS + end->tv_nsec - start->tv_nsec;
    }
    else
    {
        temp.tv_sec = end->tv_sec - start->tv_sec;
        temp.tv_nsec = end->tv_nsec - start->tv_nsec;
    }
    return temp;
}

void time_bench_before() { clock_gettime(CLOCK_MONOTONIC, &g_bench.before); }

void time_bench_after(uint64_t *t, int i)
{
    clock_gettime(CLOCK_MONOTONIC, &g_bench.after);
    time_s temp = time_sub(&g_bench.after, &g_bench.before);
    t[i] = temp.tv_sec * NSPERS + temp.tv_nsec;
}

#elif defined(_WIN32) || defined(_WIN64)

static struct
{
    /** Stores the time measured before the execution of the benchmark. */
    LARGE_INTEGER before;
    /** Stores the time measured after the execution of the benchmark. */
    LARGE_INTEGER after;
} g_bench;

void time_bench_before() { QueryPerformanceCounter(&g_bench.before); }

void time_bench_after(uint64_t *t, int i)
{
    QueryPerformanceCounter(&g_bench.after);
    LARGE_INTEGER Frequency;
    QueryPerformanceFrequency(&Frequency);
    int64_t temp = g_bench.after.QuadPart - g_bench.before.QuadPart;
    t[i] = (NSPERS * temp) / Frequency.QuadPart;
}

#else

#endif

void print_sc_bps(const uint64_t *t, int benches, int rounds, int block_size)
{
    if (benches < 2)
    {
        fprintf(stderr, "ERROR: Need a least two bench counts!\n");
        return;
    }

    uint64_t acc = 0;

    for (int i = 0; i < benches; i++) acc += t[i];

    uint64_t bits = benches * rounds * block_size;

    double kbits = (double)bits / (1 << 10);

    double mbits = (double)bits / (1 << 20);

    double gbits = (double)bits / (1 << 30);

    double secend = (double)acc / NSPERS;

    double throughpt_bits_s = (double)bits / secend;// bits/s

    double throughpt_kbits_s = (double)kbits / secend;// kbits/s

    double throughpt_mbits_s = (double)mbits / secend;// mbits/s

    double throughpt_gbits_s = (double)gbits / secend;// gbits/s

    printf("Execute time: %f s\n", secend);
    if (throughpt_bits_s < 1000) printf("Throughpt: %f bps\n", throughpt_bits_s);
    else if (throughpt_kbits_s < 1000)
        printf("Throughpt: %f Kbps\n", throughpt_kbits_s);
    else if (throughpt_mbits_s < 1000)
        printf("Throughpt: %f Mbps\n", throughpt_mbits_s);
    else
        printf("Throughpt: %f Gbps\n", throughpt_gbits_s);

    printf("\n");
}

// Performance test function
void test_des_performance()
{
    // Fixed example plaintext 4e45565251554954 
    unsigned char plaintext[DES_BLOCK_SIZE] = { 0x4e,0x45,0x56,0x52,0x51,0x55,0x49,0x54 };
    // Fixed example key  4b41534849534142  
    unsigned char key[DES_KEY_SIZE] = { 0x4b,0x41,0x53,0x48,0x49,0x53,0x41,0x42 };
    unsigned char subKeys[16][6];

    unsigned char ciphertext[DES_BLOCK_SIZE];
    unsigned char decrypted[DES_BLOCK_SIZE];

    // Generate subkeys
    if (des_make_subkeys(key, subKeys) != 0)
    {
        printf("Failed to generate subkeys.\n");
        return;
    }

    // Perform performance test
    BPS_BENCH_START("DES encryption", BENCHS);
    BPS_BENCH_ITEM(des_encrypt_block(plaintext, subKeys, ciphertext), ROUNDS);
    BPS_BENCH_FINAL(DES_BLOCK_BITS);

    BPS_BENCH_START("DES decryption", BENCHS);
    BPS_BENCH_ITEM(des_decrypt_block(ciphertext, subKeys, decrypted), ROUNDS);
    BPS_BENCH_FINAL(DES_BLOCK_BITS);
}

Answer 1

至少这个问题

溢出

temp.tv_sec * NSPERS

存在溢出风险 (

time_t * int

)。最好确保至少 64 位数学。

t[i] = temp.tv_sec * (int64_t) NSPERS + temp.tv_nsec;

为什么 gcc/clang 选项 -fl 会使我的程序失败？ [已关闭]

问题描述投票：0回答：1

1个回答

最新问题

为什么 gcc/clang 选项 -fl 会使我的程序失败？ [已关闭]

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1