如何在迭代结构数组时从 Lua 获得更好的性能

问题描述 投票:0回答:1

问题: 在 Lua 中迭代结构体数组并操作数据,这将在稍后的 C++ 中使用。

背景故事: 这周我做了一些性能测试,我对 Lua 的性能有点失望。 我的挣扎始于将 Lua 作为脚本接口集成到我的游戏引擎中。为了简单起见,我开始使用 luabridge,但由于一些便利的功能,我很快切换到 sol2。然后我第一次测量了性能,并对它的糟糕程度感到震惊。

测试用例: 我提取了一个独立的测试用例(请参阅代码:)来比较本机 C++ 性能与 sol2 性能。还是同样的坏结果。然后我还添加了另一个测试用例,它使用普通 Lua 和 Light Userdata 来执行相同的操作。性能好一点,但还远远不够好,如下所示。

时间:

C++                  elapsed time: 0.002736s
Sol (Container)      elapsed time: 0.999166s
Lua (Light Userdata) elapsed time: 0.338946s

问题: 对于这样的用例,这是值得期待的还是有机会接近本机 C++ 性能?

信息:

  • LuaJit(最新主分支)
  • sol2(最新主分支)
  • 编译器:MSVC19
  • 操作系统:Windows 11

代码:

#define SOL_ALL_SAFETIES_ON  0
#define SOL_USING_CXX_LUAJIT 1
#include <sol/sol.hpp>
#include <chrono>


struct Transform
{
    float position_x;
    float position_y;
    float position_z;
    float scale_x;
    float scale_y;
    float scale_z;
};

Transform* p_transforms = nullptr;


std::vector<Transform*> GetTransformPointerArray( int32_t count )
{
    std::vector<Transform*> transform_pointers( count );
    for( int i = 0; i < transform_pointers.size(); ++i )
        transform_pointers[ i ] = &p_transforms[ i ];

    return transform_pointers;
}


void c_Update( int32_t count )
{
    for( int i = 0; i < count; ++i )
    {
        Transform* p_transform = &p_transforms[ i ];

        p_transform->position_x += 0.01f;
        p_transform->scale_x += 0.01f;
    }
}


void c_perf_test( int32_t iterations, int32_t count )
{
    auto start = std::chrono::high_resolution_clock::now();

    for( int i = 0; i < iterations; ++i )
        c_Update( count );

    auto                          end             = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end - start;
    double                        elapsed         = elapsed_seconds.count();

    printf( "C++                  elapsed time: %fs\n", elapsed );
}


void sol_perf_test( int32_t iterations, int32_t count )
{
    sol::state lua;
    lua.open_libraries();

    lua.new_usertype<Transform>( "Transform",
                                 "position_x", &Transform::position_x,
                                 "position_y", &Transform::position_y,
                                 "position_z", &Transform::position_z,
                                 "scale_x", &Transform::scale_x,
                                 "scale_y", &Transform::scale_y,
                                 "scale_z", &Transform::scale_z );

    lua.script( R"(
        function Update( transforms )
            for i = 1, #transforms, 1 do
                local transform = transforms[i]

                local position_x = transform.position_x
                local scale_x    = transform.scale_x

                position_x = position_x + 0.01
                scale_x    = scale_x + 0.01

                transform.position_x = position_x
                transform.scale_x    = scale_x
            end
        end
    )" );

    sol::function update_func = lua[ "Update" ];

    std::vector<Transform*> transform_pointers = GetTransformPointerArray( count );

    auto start = std::chrono::high_resolution_clock::now();

    for( int i = 0; i < iterations; ++i )
        update_func( transform_pointers );

    auto                          end             = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end - start;
    double                        elapsed         = elapsed_seconds.count();

    printf( "Sol (Container)      elapsed time: %fs\n", elapsed );
}


static int get_light_transform_array( lua_State* L )
{
    lua_pushlightuserdata( L, p_transforms );
    return 1;
}


static int get_light_transform( lua_State* L )
{
    Transform* p_transforms = (Transform*) lua_touserdata( L, 2 );
    int        index        = luaL_checkint( L, 3 );

    lua_pushlightuserdata( L, &p_transforms[ index - 1 ] );
    return 1;
}


static int get_position_x( lua_State* L )
{
    Transform* p_transform = (Transform*) lua_touserdata( L, 2 );
    lua_pushnumber( L, p_transform->position_x );
    return 1;
}


static int set_position_x( lua_State* L )
{
    Transform* p_transform  = (Transform*) lua_touserdata( L, 2 );
    p_transform->position_x = lua_tonumber( L, 3 );
    return 0;
}


static int get_scale_x( lua_State* L )
{
    Transform* p_transform = (Transform*) lua_touserdata( L, 2 );
    lua_pushnumber( L, p_transform->scale_x );
    return 1;
}


static int set_scale_x( lua_State* L )
{
    Transform* p_transform = (Transform*) lua_touserdata( L, 2 );
    p_transform->scale_x   = lua_tonumber( L, 3 );
    return 0;
}


static void create_transform_library( lua_State* L )
{
    static const struct luaL_Reg transform_library[] = {
        {"GetLightTransformArray", get_light_transform_array},
        {     "GetLightTransform",       get_light_transform},
        {          "GetPositionX",            get_position_x},
        {          "SetPositionX",            set_position_x},
        {             "GetScaleX",               get_scale_x},
        {             "SetScaleX",               set_scale_x},
        {                    NULL,                      NULL}
    };

    luaL_openlib( L, "Transform", transform_library, 0 );
}


void lightuserdata_perf_test( int32_t iterations, int32_t count )
{
    lua_State* p_lua = luaL_newstate();
    luaL_openlibs( p_lua );

    create_transform_library( p_lua );

    int status = luaL_dostring( p_lua, R"(
        function Update( count )
            local transforms = Transform:GetLightTransformArray()

            for i = 1, count, 1 do
                local light_transform = Transform:GetLightTransform( transforms, i )
                local position_x      = Transform:GetPositionX( light_transform )
                local scale_x         = Transform:GetScaleX( light_transform )

                position_x = position_x + 0.01
                scale_x    = scale_x + 0.01

                Transform:SetPositionX( light_transform, position_x )
                Transform:SetScaleX( light_transform, scale_x )
            end
        end
    )" );

    if( status != 0 )
    {
        printf( "Error: %s\n", lua_tostring( p_lua, -1 ) );
        return;
    }

    auto start = std::chrono::high_resolution_clock::now();

    for( int i = 0; i < iterations; ++i )
    {
        lua_getglobal( p_lua, "Update" );
        lua_pushinteger( p_lua, count );
        lua_pcall( p_lua, 1, 0, 0 );
    }

    auto                          end             = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end - start;
    double                        elapsed         = elapsed_seconds.count();

    printf( "Lua (Light Userdata) elapsed time: %fs\n", elapsed );

    lua_close( p_lua );
}


int main( int argc, char* argv[] )
{
    int32_t iterations = 1000;
    int32_t count      = 5000;

    p_transforms = new Transform[ count ];
    memset( p_transforms, 0, sizeof( Transform ) * count );

    c_perf_test( iterations, count );
    sol_perf_test( iterations, count );
    lightuserdata_perf_test( iterations, count );

    delete[] p_transforms;
    return 0;
}
c++ performance lua luajit lua-userdata
1个回答
0
投票

又过了几个小时,我发现了 ffi.cast 并编写了另一个结合了 Light Userdataffi 的测试用例。现在的时间非常接近原生 C++。对于性能非常关键的东西,这可能对我有用,但我担心我会放弃安全。也许有人对如何在不使用 ffi 的情况下提高性能有其他想法。

时间:

C++                  elapsed time: 0.001683s
Sol (Container)      elapsed time: 1.020745s
Lua (Light Userdata) elapsed time: 0.337135s
LuaJit ffi           elapsed time: 0.004741s

代码:

void ffi_perf_test( int32_t iterations, int32_t count )
{
    sol::state lua;
    lua.open_libraries( sol::lib::base, sol::lib::package, sol::lib::jit, sol::lib::ffi );
    lua_State* p_lua = lua.lua_state();

    create_transform_library( p_lua );

    int status = luaL_dostring( p_lua, R"(
        local ffi = require( "ffi" )

        ffi.cdef[[
            typedef struct Transform
            {
                float position_x;
                float position_y;
                float position_z;
                float scale_x;
                float scale_y;
                float scale_z;
            } Transform;
        ]]

        function Update( count )
            local transforms     = Transform:GetLightTransformArray()
            local ffi_transforms = ffi.cast( "Transform*", transforms )

            for i = 0, count-1, 1 do
                local position_x = ffi_transforms[i].position_x
                local scale_x    = ffi_transforms[i].scale_x

                position_x = position_x + 0.01
                scale_x    = scale_x + 0.01

                ffi_transforms[i].position_x = position_x
                ffi_transforms[i].scale_x    = scale_x
            end
        end
    )" );

    if( status != 0 )
    {
        printf( "Error: %s\n", lua_tostring( p_lua, -1 ) );
        return;
    }

    auto start = std::chrono::high_resolution_clock::now();

    for( int i = 0; i < iterations; ++i )
    {
        lua_getglobal( p_lua, "Update" );
        lua_pushinteger( p_lua, count );
        lua_pcall( p_lua, 1, 0, 0 );
    }

    auto                          end             = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed_seconds = end - start;
    double                        elapsed         = elapsed_seconds.count();

    printf( "LuaJit ffi           elapsed time: %fs\n", elapsed );
}
© www.soinside.com 2019 - 2024. All rights reserved.