问题: 在 Lua 中迭代结构体数组并操作数据,这将在稍后的 C++ 中使用。
背景故事: 这周我做了一些性能测试,我对 Lua 的性能有点失望。 我的挣扎始于将 Lua 作为脚本接口集成到我的游戏引擎中。为了简单起见,我开始使用 luabridge,但由于一些便利的功能,我很快切换到 sol2。然后我第一次测量了性能,并对它的糟糕程度感到震惊。
测试用例: 我提取了一个独立的测试用例(请参阅代码:)来比较本机 C++ 性能与 sol2 性能。还是同样的坏结果。然后我还添加了另一个测试用例,它使用普通 Lua 和 Light Userdata 来执行相同的操作。性能好一点,但还远远不够好,如下所示。
时间:
C++ elapsed time: 0.002736s
Sol (Container) elapsed time: 0.999166s
Lua (Light Userdata) elapsed time: 0.338946s
问题: 对于这样的用例,这是值得期待的还是有机会接近本机 C++ 性能?
信息:
代码:
#define SOL_ALL_SAFETIES_ON 0
#define SOL_USING_CXX_LUAJIT 1
#include <sol/sol.hpp>
#include <chrono>
struct Transform
{
float position_x;
float position_y;
float position_z;
float scale_x;
float scale_y;
float scale_z;
};
Transform* p_transforms = nullptr;
std::vector<Transform*> GetTransformPointerArray( int32_t count )
{
std::vector<Transform*> transform_pointers( count );
for( int i = 0; i < transform_pointers.size(); ++i )
transform_pointers[ i ] = &p_transforms[ i ];
return transform_pointers;
}
void c_Update( int32_t count )
{
for( int i = 0; i < count; ++i )
{
Transform* p_transform = &p_transforms[ i ];
p_transform->position_x += 0.01f;
p_transform->scale_x += 0.01f;
}
}
void c_perf_test( int32_t iterations, int32_t count )
{
auto start = std::chrono::high_resolution_clock::now();
for( int i = 0; i < iterations; ++i )
c_Update( count );
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
double elapsed = elapsed_seconds.count();
printf( "C++ elapsed time: %fs\n", elapsed );
}
void sol_perf_test( int32_t iterations, int32_t count )
{
sol::state lua;
lua.open_libraries();
lua.new_usertype<Transform>( "Transform",
"position_x", &Transform::position_x,
"position_y", &Transform::position_y,
"position_z", &Transform::position_z,
"scale_x", &Transform::scale_x,
"scale_y", &Transform::scale_y,
"scale_z", &Transform::scale_z );
lua.script( R"(
function Update( transforms )
for i = 1, #transforms, 1 do
local transform = transforms[i]
local position_x = transform.position_x
local scale_x = transform.scale_x
position_x = position_x + 0.01
scale_x = scale_x + 0.01
transform.position_x = position_x
transform.scale_x = scale_x
end
end
)" );
sol::function update_func = lua[ "Update" ];
std::vector<Transform*> transform_pointers = GetTransformPointerArray( count );
auto start = std::chrono::high_resolution_clock::now();
for( int i = 0; i < iterations; ++i )
update_func( transform_pointers );
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
double elapsed = elapsed_seconds.count();
printf( "Sol (Container) elapsed time: %fs\n", elapsed );
}
static int get_light_transform_array( lua_State* L )
{
lua_pushlightuserdata( L, p_transforms );
return 1;
}
static int get_light_transform( lua_State* L )
{
Transform* p_transforms = (Transform*) lua_touserdata( L, 2 );
int index = luaL_checkint( L, 3 );
lua_pushlightuserdata( L, &p_transforms[ index - 1 ] );
return 1;
}
static int get_position_x( lua_State* L )
{
Transform* p_transform = (Transform*) lua_touserdata( L, 2 );
lua_pushnumber( L, p_transform->position_x );
return 1;
}
static int set_position_x( lua_State* L )
{
Transform* p_transform = (Transform*) lua_touserdata( L, 2 );
p_transform->position_x = lua_tonumber( L, 3 );
return 0;
}
static int get_scale_x( lua_State* L )
{
Transform* p_transform = (Transform*) lua_touserdata( L, 2 );
lua_pushnumber( L, p_transform->scale_x );
return 1;
}
static int set_scale_x( lua_State* L )
{
Transform* p_transform = (Transform*) lua_touserdata( L, 2 );
p_transform->scale_x = lua_tonumber( L, 3 );
return 0;
}
static void create_transform_library( lua_State* L )
{
static const struct luaL_Reg transform_library[] = {
{"GetLightTransformArray", get_light_transform_array},
{ "GetLightTransform", get_light_transform},
{ "GetPositionX", get_position_x},
{ "SetPositionX", set_position_x},
{ "GetScaleX", get_scale_x},
{ "SetScaleX", set_scale_x},
{ NULL, NULL}
};
luaL_openlib( L, "Transform", transform_library, 0 );
}
void lightuserdata_perf_test( int32_t iterations, int32_t count )
{
lua_State* p_lua = luaL_newstate();
luaL_openlibs( p_lua );
create_transform_library( p_lua );
int status = luaL_dostring( p_lua, R"(
function Update( count )
local transforms = Transform:GetLightTransformArray()
for i = 1, count, 1 do
local light_transform = Transform:GetLightTransform( transforms, i )
local position_x = Transform:GetPositionX( light_transform )
local scale_x = Transform:GetScaleX( light_transform )
position_x = position_x + 0.01
scale_x = scale_x + 0.01
Transform:SetPositionX( light_transform, position_x )
Transform:SetScaleX( light_transform, scale_x )
end
end
)" );
if( status != 0 )
{
printf( "Error: %s\n", lua_tostring( p_lua, -1 ) );
return;
}
auto start = std::chrono::high_resolution_clock::now();
for( int i = 0; i < iterations; ++i )
{
lua_getglobal( p_lua, "Update" );
lua_pushinteger( p_lua, count );
lua_pcall( p_lua, 1, 0, 0 );
}
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
double elapsed = elapsed_seconds.count();
printf( "Lua (Light Userdata) elapsed time: %fs\n", elapsed );
lua_close( p_lua );
}
int main( int argc, char* argv[] )
{
int32_t iterations = 1000;
int32_t count = 5000;
p_transforms = new Transform[ count ];
memset( p_transforms, 0, sizeof( Transform ) * count );
c_perf_test( iterations, count );
sol_perf_test( iterations, count );
lightuserdata_perf_test( iterations, count );
delete[] p_transforms;
return 0;
}
又过了几个小时,我发现了 ffi.cast 并编写了另一个结合了 Light Userdata 和 ffi 的测试用例。现在的时间非常接近原生 C++。对于性能非常关键的东西,这可能对我有用,但我担心我会放弃安全。也许有人对如何在不使用 ffi 的情况下提高性能有其他想法。
时间:
C++ elapsed time: 0.001683s
Sol (Container) elapsed time: 1.020745s
Lua (Light Userdata) elapsed time: 0.337135s
LuaJit ffi elapsed time: 0.004741s
代码:
void ffi_perf_test( int32_t iterations, int32_t count )
{
sol::state lua;
lua.open_libraries( sol::lib::base, sol::lib::package, sol::lib::jit, sol::lib::ffi );
lua_State* p_lua = lua.lua_state();
create_transform_library( p_lua );
int status = luaL_dostring( p_lua, R"(
local ffi = require( "ffi" )
ffi.cdef[[
typedef struct Transform
{
float position_x;
float position_y;
float position_z;
float scale_x;
float scale_y;
float scale_z;
} Transform;
]]
function Update( count )
local transforms = Transform:GetLightTransformArray()
local ffi_transforms = ffi.cast( "Transform*", transforms )
for i = 0, count-1, 1 do
local position_x = ffi_transforms[i].position_x
local scale_x = ffi_transforms[i].scale_x
position_x = position_x + 0.01
scale_x = scale_x + 0.01
ffi_transforms[i].position_x = position_x
ffi_transforms[i].scale_x = scale_x
end
end
)" );
if( status != 0 )
{
printf( "Error: %s\n", lua_tostring( p_lua, -1 ) );
return;
}
auto start = std::chrono::high_resolution_clock::now();
for( int i = 0; i < iterations; ++i )
{
lua_getglobal( p_lua, "Update" );
lua_pushinteger( p_lua, count );
lua_pcall( p_lua, 1, 0, 0 );
}
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
double elapsed = elapsed_seconds.count();
printf( "LuaJit ffi elapsed time: %fs\n", elapsed );
}