如何减少boost::interprocess::map中创建和插入的时间?

问题描述 投票:0回答:1

在下面的代码中,我将 msgpack 文件解压到

std::map
。这个过程持续时间差不多85秒。

#include <map>
#include <vector>
#include <string>
#include <iostream>
#include <exception>
#include <msgpack.hpp>
#include <boost/variant.hpp>
#include <boost/filesystem.hpp>

using namespace std::literals;
namespace fs = boost::filesystem;

enum class TYPE_MSG : int
{
    NULLPTR_,
    INT64_,
    DOUBLE_,
    STRING_,
    VECTOR_,
    MAP_,
};

class MOVar;
typedef boost::variant<std::nullptr_t, int64_t, double, std::string, std::vector<MOVar>, std::map<std::string, MOVar>> MOVarST;
class MOVar : public MOVarST
{
public:
    MOVar() : MOVarST(nullptr) {}
    MOVar(double b) { MOVarST::operator=(b); }
    MOVar(int64_t b) { MOVarST::operator=(b); }
    MOVar(int b) : MOVar(static_cast<int64_t>(b)) {}
    MOVar(std::string &&b) { MOVarST::operator=(b); }
    MOVar(std::vector<MOVar> &b) { MOVarST::operator=(b); }
    MOVar(std::map<std::string, MOVar> &b) { MOVarST::operator=(b); }

    const MOVar &operator=(const int64_t &b) { MOVarST::operator=(b); return *this; }
    const MOVar &operator=(std::string &&b) { MOVarST::operator=(std::move(b)); return *this; }
    const MOVar &operator=(std::string &b) { MOVarST::operator=(std::move(b)); return *this; }
    const MOVar &operator=(const double &b) { MOVarST::operator=(b); return *this; }
    const MOVar &operator=(std::vector<MOVar> &&b) { MOVarST::operator=(std::move(b)); return *this; }
    const MOVar &operator=(std::map<std::string, MOVar> &&b) { MOVarST::operator=(std::move(b)); return *this; }

    bool is_map() const { return which() == (int)TYPE_MSG::MAP_; }
    bool is_int64() const { return which() == (int)TYPE_MSG::INT64_; }
    bool is_nill() const { return which() == (int)TYPE_MSG::NULLPTR_; }
    bool is_double() const { return which() == (int)TYPE_MSG::DOUBLE_; }
    bool is_string() const { return which() == (int)TYPE_MSG::STRING_; }
    bool is_vector() const { return which() == (int)TYPE_MSG::VECTOR_; }

    const double &_as_double() const { return boost::get<double>(*this); }
    const int64_t &_as_int64() const { return boost::get<int64_t>(*this); }
    const std::string &_as_string() const { return boost::get<std::string>(*this); }
    const std::vector<MOVar> &_as_vector() const { return boost::get<std::vector<MOVar>>(*this); }
    const std::map<std::string, MOVar> &_as_map() const { return boost::get<std::map<std::string, MOVar>>(*this); }

private:
};

void convert_msgpack_to_movar(msgpack::object const &o, MOVar &v);

namespace msgpack
{
    MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS)
    {
        namespace adaptor
        {
            template <>
            struct convert<MOVar>
            {
                msgpack::object const &operator()(msgpack::object const &o, MOVar &v) const
                {
                    convert_msgpack_to_movar(o, v);
                    return o;
                }
            };
    }
    }
}

int main()
{
    std::map<std::string, MOVar> map;
    auto fileName = "big_map.msgpack"s;

    auto startTime = std::chrono::high_resolution_clock::now();
    {
        std::ifstream file(fileName, std::ios::binary);
        auto fileSize = fs::file_size(fileName);
        std::vector<char> buffer(fileSize);
        file.read(buffer.data(), fileSize);

        msgpack::object_handle oh = msgpack::unpack(buffer.data(), fileSize);
        msgpack::object deserialized = oh.get();
        deserialized.convert(map);
    }
    auto endTime = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::seconds>(endTime - startTime);
    std::cout << "Duration: " << duration.count() << " seconds" << std::endl;
}


但是当我尝试用

std::map
替换
boost::interprocess::map
时,时间几乎增加了 12 倍。 我有什么错吗?是否有必要替换我使用 boost 共享内存的方法? (我在第二个过程中检查了两张地图及其结果,两者的结果都是相同的。)

#include <fstream>
#include <iostream>
#include <exception>
#include <msgpack.hpp>
#include <boost/variant.hpp>
#include <boost/filesystem.hpp>
#include <boost/interprocess/containers/map.hpp>
#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/allocators/allocator.hpp>

using namespace std::literals;
namespace fs = boost::filesystem;
namespace bip = boost::interprocess;

enum class TYPE_MSG : int
{
    NULLPTR_,
    INT64_,
    DOUBLE_,
    STRING_,
    VECTOR_,
    MAP_,
};

auto sharedMemoryName = "MySharedMemory"s;
unsigned long long shmSize = 9.8 * 1024 * 1024 * 1024ull;
bip::managed_shared_memory segment(bip::open_or_create, sharedMemoryName.data(), shmSize);

template <typename T>
using Alloc = bip::allocator<T, bip::managed_shared_memory::segment_manager>;
const Alloc<void> allocator(segment.get_segment_manager());

//MOVar class is like last example 

void Convertor(MAP &map, std::map<std::string, MOVar>::const_iterator &pair);

class MOVarBip;
using STR = bip::basic_string<char, std::char_traits<char>, Alloc<char>>;
using PAIR = std::pair<const STR, MOVarBip>;
using MAP = bip::map<STR, MOVarBip, std::less<STR>, Alloc<PAIR>>;
using Vec = bip::vector<MOVarBip, Alloc<MOVarBip>>;


typedef boost::variant<std::nullptr_t, int64_t, double, STR, Vec, MAP> MOVarSTBIP;
class MOVarBip : public MOVarSTBIP
{
public:
    MOVarBip() : MOVarSTBIP(nullptr) {}
    MOVarBip(int64_t &b) { MOVarSTBIP::operator=(std::move(b)); }
    MOVarBip(double &b) { MOVarSTBIP::operator=(std::move(b)); }
    MOVarBip(STR &b) { MOVarSTBIP::operator=(std::move(b)); }
    MOVarBip(Vec &b) { MOVarSTBIP::operator=(std::move(b)); }
    MOVarBip(MAP &b) { MOVarSTBIP::operator=(std::move(b)); }

    const MOVarBip& operator=(int64_t&& b) { MOVarSTBIP::operator=(std::move(b)); return *this; }
    const MOVarBip& operator=(double&& b) { MOVarSTBIP::operator=(std::move(b)); return *this; }

    const MOVarBip& operator=(std::string&& b)
    {
        auto &tmpValue = *segment.construct<STR>(bip::anonymous_instance)(allocator);
        tmpValue = b.data();
        MOVarSTBIP::operator=(std::move(tmpValue));
        return *this;
    }

    const MOVarBip& operator=(std::vector<MOVar>&& value)
    {
        auto &vecBip = *segment.construct<Vec>(bip::anonymous_instance)(allocator);
        for (auto &item : value)
        {
            switch (item.which())
            {
            case static_cast<int>(TYPE_MSG::MAP_):
            {
                auto &mapBip = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
                auto element = item._as_map().begin();
                auto mapEnd = item._as_map().end();
                for (; element != mapEnd; ++element)
                {
                    Convertor(mapBip, element);
                }
                MOVarBip valueBip = mapBip;
                vecBip.push_back(std::move(valueBip));
                break;
            }
            case static_cast<int>(TYPE_MSG::STRING_):
            {
                auto &tmpValue = *segment.construct<STR>(bip::anonymous_instance)(allocator);
                tmpValue = item._as_string().data();
                MOVarBip valueBip = tmpValue;
                vecBip.push_back(std::move(valueBip));
                break;
            }
            default:
            {
                throw std::logic_error("The code doesn't support this scenario for Vec type!");
            }
            }
        }

        MOVarSTBIP::operator=(std::move(vecBip));
        return *this;
    }

    const MOVarBip& operator=(std::map<std::string, MOVar>&& value)
    {
        auto &mapBip = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
        auto itr = value.cbegin();
        auto endPoint = value.cend();
        for (; itr != endPoint; ++itr)
        {
            Convertor(mapBip, itr);
        }

        MOVarSTBIP::operator=(std::move(mapBip));
        return *this;
    }

    bool is_map() const { return which() == (int)TYPE_MSG::MAP_; }
    bool is_int64() const { return which() == (int)TYPE_MSG::INT64_; }
    bool is_nill() const { return which() == (int)TYPE_MSG::NULLPTR_; }
    bool is_double() const { return which() == (int)TYPE_MSG::DOUBLE_; }
    bool is_string() const { return which() == (int)TYPE_MSG::STRING_; }
    bool is_vector() const { return which() == (int)TYPE_MSG::VECTOR_; }

    const double &_as_double() const { return boost::get<double>(*this); }
    const int64_t &_as_int64() const { return boost::get<int64_t>(*this); }
    const STR &_as_string() const { return boost::get<STR>(*this); }
    const Vec &_as_vector() const { return boost::get<Vec>(*this); }
    const MAP &_as_map() const { return boost::get<MAP>(*this); }

private:
};


void Convertor(MAP &map, std::map<std::string, MOVar>::const_iterator &pair)
{
    auto &keyBip = *segment.construct<STR>(bip::anonymous_instance)(allocator);
    keyBip = pair->first.data();

    auto &value = pair->second;
    switch (value.which())
    {
    case static_cast<int>(TYPE_MSG::NULLPTR_):
    {
        auto &valueBip = *segment.construct<MOVarBip>(bip::anonymous_instance)();
        map.insert({std::move(keyBip), std::move(valueBip)});
        break;
    }
    case static_cast<int>(TYPE_MSG::INT64_):
    {
        auto &tmpValue = *segment.construct<int64_t>(bip::anonymous_instance)();
        tmpValue = value._as_int64();
        MOVarBip valueBip = tmpValue;
        map.insert({std::move(keyBip), std::move(valueBip)});
        break;
    }
    case static_cast<int>(TYPE_MSG::DOUBLE_):
    {
        auto &tmpValue = *segment.construct<double>(bip::anonymous_instance)();
        tmpValue = value._as_double();
        MOVarBip valueBip = tmpValue;
        map.insert({std::move(keyBip), std::move(valueBip)});
        break;
    }
    case static_cast<int>(TYPE_MSG::STRING_):
    {
        auto &tmpValue = *segment.construct<STR>(bip::anonymous_instance)(allocator);
        tmpValue = value._as_string().data();
        MOVarBip valueBip = tmpValue;
        map.insert({std::move(keyBip), std::move(valueBip)});
        break;
    }
    case static_cast<int>(TYPE_MSG::VECTOR_):
    {
        auto &vecBip = *segment.construct<Vec>(bip::anonymous_instance)(allocator);
        for (auto &item : value._as_vector())
        {
            switch (item.which())
            {
            case static_cast<int>(TYPE_MSG::MAP_):
            {
                auto &mapBip = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
                auto element = item._as_map().begin();
                auto mapEnd = item._as_map().end();
                for (; element != mapEnd; ++element)
                {
                    Convertor(mapBip, element);
                }
                MOVarBip valueBip = mapBip;
                vecBip.push_back(std::move(valueBip));
                break;
            }
            case static_cast<int>(TYPE_MSG::STRING_):
            {
                auto &tmpValue = *segment.construct<STR>(bip::anonymous_instance)(allocator);
                tmpValue = item._as_string().data();
                MOVarBip valueBip = tmpValue;
                vecBip.push_back(std::move(valueBip));
                break;
            }
            default:
            {
                throw std::logic_error("The code doesn't support this scenario for Vec type!");
            }
            }
        }
        MOVarBip valueBip = vecBip;
        map.insert({std::move(keyBip), std::move(valueBip)});
        break;
    }
    case static_cast<int>(TYPE_MSG::MAP_):
    {
        auto &mapBip = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
        auto itr = value._as_map().begin();
        auto endPoint = value._as_map().end();
        for (; itr != endPoint; ++itr)
        {
            Convertor(mapBip, itr);
        }
        MOVarBip valueBip = mapBip;
        map.insert({std::move(keyBip), std::move(valueBip)});
        break;
    }
    default:
    {
        throw std::logic_error("The code doesn't support this scenario!");
        break;
    }
    }
}

namespace msgpack
{
    MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS)
    {
        namespace adaptor
        {
            template <>
            struct convert<STR>
            {
                msgpack::object const &operator()(msgpack::object const &o, STR &v) const
                {
                    switch (o.type)
                    {
                    case msgpack::type::BIN:
                        v.assign(o.via.bin.ptr, o.via.bin.size);
                        break;
                    case msgpack::type::STR:
                        v.assign(o.via.str.ptr, o.via.str.size);
                        break;
                    default:
                        throw msgpack::type_error();
                        break;
                    }
                    return o;
                }
            };

            template <>
            struct convert<MOVarBip>
            {
                msgpack::object const &operator()(msgpack::object const &o, MOVarBip &v) const
                {
                    switch (o.type)
                    {
                    case msgpack::type::NIL:
                        v = MOVarBip();
                        break;
                    case msgpack::type::BOOLEAN:
                        v = (int64_t)(o.as<bool>());
                        break;
                    case msgpack::type::POSITIVE_INTEGER:
                    {
                        uint64_t temp = o.as<uint64_t>();
                        if (temp > (uint64_t)0x7FFFFFFFFFFFFFFF)
                        {
                            v = std::to_string(temp);
                        }
                        else
                        {
                            v = ((int64_t)temp);
                        }
                        break;
                    }
                    case msgpack::type::NEGATIVE_INTEGER:
                        v = (o.as<int64_t>());
                        break;
                    case msgpack::type::FLOAT32:
                        v = ((double)o.as<float>());
                        break;
                    case msgpack::type::FLOAT64:
                        v = (o.as<double>());
                        break;
                    case msgpack::type::STR:
                        v = o.as<std::string>();
                        break;
                    case msgpack::type::BIN:
                        v = o.as<std::string>();
                        break;
                    case msgpack::type::ARRAY:
                        v = o.as<std::vector<MOVar>>();
                        break;
                    case msgpack::type::MAP:
                        v = o.as<std::map<std::string, MOVar>>();
                        break;
                    case msgpack::type::EXT:
                        throw msgpack::type_error();
                        break;
                    }
                    return o;
                }
            };

            template <>
            struct convert<MAP>
            {
                msgpack::object const &operator()(msgpack::object const &o, MAP &v) const
                {
                    if (o.type != msgpack::type::MAP)
                    {
                        throw msgpack::type_error();
                    }
                    msgpack::object_kv *p(o.via.map.ptr);
                    msgpack::object_kv *const pend(o.via.map.ptr + o.via.map.size);
                    auto &tmp = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
                    for (; p != pend; ++p)
                    {
                        auto &key = *segment.construct<STR>(bip::anonymous_instance)(allocator);
                        p->key.convert(key);
                        p->val.convert(tmp[std::move(key)]);
                    }
                    v = std::move(tmp);
                    return o;
                }
            };
        }
    }
}


int main()
{
    auto fileName = "big_map.msgpack"s;
    startTime = std::chrono::high_resolution_clock::now();
    {
        std::ifstream file(fileName, std::ios::binary);
        auto fileSize = fs::file_size(fileName);
        std::vector<char> buffer(fileSize);
        file.read(buffer.data(), fileSize);

        auto &bip_map = *segment.construct<MAP>("bip_map")(allocator);
        msgpack::object_handle oh = msgpack::unpack(buffer.data(), fileSize);
        msgpack::object deserialized = oh.get();
        deserialized.convert(bip_map);
    }
    endTime = std::chrono::high_resolution_clock::now();
    duration = std::chrono::duration_cast<std::chrono::seconds>(endTime - startTime);
    std::cout << "Duration: " << duration.count() << " seconds" << std::endl;
    boost::interprocess::shared_memory_object::remove(sharedMemoryName.data());
}


这是代码的输出:

持续时间:72秒(

std::map

持续时间:956秒(

boost::interprocess::map

c++ boost shared-memory msgpack
1个回答
0
投票

您通过手动构造匿名实例来否定分配器。这不仅会导致泄漏(您的代码中有大量泄漏),而且效率也较低。

从 MOVar 到 MOVarBip 的所有手动编码转换都可以由单个函数处理,从而避免许多临时操作:

template <typename T> void BipConvert(T const& v, MOVarBip& result) {
    struct Vis {
        MOVarBip& result;

        void operator()(MOVar const& v) const { boost::apply_visitor(*this, v); }
        void operator()(std::nullptr_t) const { result = {}; }
        void operator()(int64_t v) const { result = v; }
        void operator()(std::string const& v) const {
            result = STR(v.c_str(), segment.get_segment_manager());
        }
        void operator()(std::map<std::string, MOVar> const& v) const {
            result = MAP(segment.get_segment_manager());

            for (auto& map = boost::get<MAP>(result); auto& [k, v] : v)
                BipConvert(v, map.emplace(k).first->second);
        }
        void operator()(std::vector<MOVar> const& v) const {
            result = Vec(segment.get_segment_manager());
            for (auto& vec = boost::get<Vec>(result); auto& item : v)
                BipConvert(item, vec.emplace_back());
        }
    };
    Vis{result}(v);
}

此外,从可变左值引用中随机移动并不是导致未定义行为错误的秘诀,例如:

MOVarBip(STR &b) { MOVarSTBIP::operator=(std::move(b)); }

解决这个问题的最简单方法是按值获取,无论参数的值类别如何,它都会做正确的事情。无论如何,如果您打算支持不同的段,那么除非分配器相等,否则您不应该移动。

简化

反转上述观察,我发现您确实希望有一个引用全局段的全局静态分配器。比如:

template <typename T> using BaseAlloc = bip::allocator<T, bip::managed_shared_memory::segment_manager>;
template <typename T> struct SegmentAlloc : BaseAlloc<T> {
    SegmentAlloc(bip::managed_shared_memory::segment_manager* mgr = segment->get_segment_manager())
        : BaseAlloc<T>(mgr) {}

    using BaseAlloc<T>::BaseAlloc;
    template <typename U> struct rebind {
        using other = SegmentAlloc<U>;
    };
};

现在您可以定义变量类型:

struct MOVarBip;
using STR  = bip::basic_string<char, std::char_traits<char>, SegmentAlloc<char>>;
using PAIR = std::pair<const STR, MOVarBip>;
using VEC  = bip::vector<MOVarBip, SegmentAlloc<MOVarBip>>;
using MAP  = bip::map<STR, MOVarBip, std::less<STR>, SegmentAlloc<PAIR>>;
static void json_dump(MAP const& data, std::string filename);

using MOVarSTBIP = boost::variant<std::nullptr_t, int64_t, double, STR, VEC, MAP>;

struct MOVarBip : MOVarSTBIP {
    using Base = MOVarSTBIP;

    template <typename... Args> explicit MOVarBip(Args&&... args) : Base(std::forward<Args>(args)...) {}
    using Base::operator=;

    explicit MOVarBip(std::string_view s) : Base(STR(s.data(), s.size())) {}
    explicit MOVarBip(std::string s) : Base(STR(s.data(), s.size())) {}

    bool is_map()    const { return which() == TYPE_MSG::MAP_;     }
    bool is_int64()  const { return which() == TYPE_MSG::INT64_;   }
    bool is_nill()   const { return which() == TYPE_MSG::NULLPTR_; }
    bool is_double() const { return which() == TYPE_MSG::DOUBLE_;  }
    bool is_string() const { return which() == TYPE_MSG::STRING_;  }
    bool is_vector() const { return which() == TYPE_MSG::VECTOR_;  }

    double  const& _as_double() const { return boost::get<double>(*this);  }
    int64_t const& _as_int64()  const { return boost::get<int64_t>(*this); }
    STR     const& _as_string() const { return boost::get<STR>(*this);     }
    VEC     const& _as_vector() const { return boost::get<VEC>(*this);     }
    MAP     const& _as_map()    const { return boost::get<MAP>(*this);     }

    double  & _as_double() { return boost::get<double>(*this);  }
    int64_t & _as_int64()  { return boost::get<int64_t>(*this); }
    STR     & _as_string() { return boost::get<STR>(*this);     }
    VEC     & _as_vector() { return boost::get<VEC>(*this);     }
    MAP     & _as_map()    { return boost::get<MAP>(*this);     }
};

无论如何,我总是更喜欢非侵入性的 msgpack 适配,而这里正是它的全部荣耀:

namespace msgpack {
    MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
        namespace adaptor {
            template <> struct convert<STR> {
                msgpack::object const& operator()(msgpack::object const& o, STR& v) const {
                    switch (o.type) {
                        case msgpack::type::BIN: v.assign(o.via.bin.ptr, o.via.bin.size); break;
                        case msgpack::type::STR: v.assign(o.via.str.ptr, o.via.str.size); break;
                        default: throw msgpack::type_error(); break;
                    }
                    return o;
                }
            };

            template <> struct convert<MOVarBip> {
                msgpack::object const& operator()(msgpack::object const& o, MOVarBip& v) const {
                    switch (o.type) {
                        case msgpack::type::NIL: v = MOVarBip(); break;
                        case msgpack::type::BOOLEAN: v = static_cast<int64_t>(o.as<bool>()); break;
                        case msgpack::type::POSITIVE_INTEGER:
                            if (uint64_t temp = o.as<uint64_t>(); temp > 0x7FFFFFFFFFFFFFFF)
                                v = MOVarBip(std::to_string(temp));
                            else
                                v = static_cast<int64_t>(temp);
                            break;
                        case msgpack::type::NEGATIVE_INTEGER: v = o.as<int64_t>(); break;
                        case msgpack::type::FLOAT32:
                        case msgpack::type::FLOAT64: v = o.as<double>(); break;
                        case msgpack::type::STR:
                        case msgpack::type::BIN: v = o.as<STR>(); break;
                        case msgpack::type::ARRAY: v = o.as<VEC>(); break;
                        case msgpack::type::MAP: v = o.as<MAP>(); break;
                        case msgpack::type::EXT: throw msgpack::type_error(); break;
                    }
                    return o;
                }
            };

            template <> struct convert<MAP> {
                msgpack::object const& operator()(msgpack::object const& o, MAP& m) const {
                    if (o.type != msgpack::type::MAP)
                        throw msgpack::type_error();

                    m.clear();
                    for (auto p = o.via.map.ptr, pend = p + o.via.map.size; p != pend; ++p) {
                        auto [it, uniqueOk] = m.emplace(p->key.as<STR>(), MOVarBip{});
                        assert(uniqueOk);

                        p->val.convert(it->second);
                    }
                    return o;
                }
            };

            template <> struct convert<VEC> {
                msgpack::object const& operator()(msgpack::object const& o, VEC& v) const {
                    if (o.type != msgpack::type::ARRAY)
                        throw msgpack::type_error();

                    for (auto p = o.via.array.ptr, pend = p + o.via.array.size; p != pend; ++p)
                        p->convert(v.emplace_back());

                    return o;
                }
            };
        } // namespace adaptor
    }
} // namespace msgpack

我认为它可以更加优化,但我希望这是简化的良好开端。

完整演示

实时编译器资源管理器

// #define MSGPACK_USE_BOOST
#include <iostream>
#include <msgpack.hpp>

#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/containers/map.hpp>
#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/variant.hpp>
using namespace std::literals;
namespace bip = boost::interprocess;

enum /*class*/ TYPE_MSG : int { NULLPTR_, INT64_, DOUBLE_, STRING_, VECTOR_, MAP_ };

static auto sharedMemoryName = "MySharedMemory"s;
static auto shmSize          = 9.8 * 1024 * 1024 * 1024ull;
static auto segment          = std::make_unique<bip::managed_shared_memory>(//
    bip::open_or_create, sharedMemoryName.data(), shmSize);

template <typename T> using BaseAlloc = bip::allocator<T, bip::managed_shared_memory::segment_manager>;
template <typename T> struct SegmentAlloc : BaseAlloc<T> {
    SegmentAlloc(bip::managed_shared_memory::segment_manager* mgr = segment->get_segment_manager())
        : BaseAlloc<T>(mgr) {}

    using BaseAlloc<T>::BaseAlloc;
    template <typename U> struct rebind {
        using other = SegmentAlloc<U>;
    };
};

struct MOVarBip;
using STR  = bip::basic_string<char, std::char_traits<char>, SegmentAlloc<char>>;
using PAIR = std::pair<const STR, MOVarBip>;
using VEC  = bip::vector<MOVarBip, SegmentAlloc<MOVarBip>>;
using MAP  = bip::map<STR, MOVarBip, std::less<STR>, SegmentAlloc<PAIR>>;
static void json_dump(MAP const& data, std::string filename);

using MOVarSTBIP = boost::variant<std::nullptr_t, int64_t, double, STR, VEC, MAP>;

struct MOVarBip : MOVarSTBIP {
    using Base = MOVarSTBIP;

    template <typename... Args> explicit MOVarBip(Args&&... args) : Base(std::forward<Args>(args)...) {}
    using Base::operator=;

    explicit MOVarBip(std::string_view s) : Base(STR(s.data(), s.size())) {}
    explicit MOVarBip(std::string s) : Base(STR(s.data(), s.size())) {}

    bool is_map()    const { return which() == TYPE_MSG::MAP_;     }
    bool is_int64()  const { return which() == TYPE_MSG::INT64_;   }
    bool is_nill()   const { return which() == TYPE_MSG::NULLPTR_; }
    bool is_double() const { return which() == TYPE_MSG::DOUBLE_;  }
    bool is_string() const { return which() == TYPE_MSG::STRING_;  }
    bool is_vector() const { return which() == TYPE_MSG::VECTOR_;  }

    double  const& _as_double() const { return boost::get<double>(*this);  }
    int64_t const& _as_int64()  const { return boost::get<int64_t>(*this); }
    STR     const& _as_string() const { return boost::get<STR>(*this);     }
    VEC     const& _as_vector() const { return boost::get<VEC>(*this);     }
    MAP     const& _as_map()    const { return boost::get<MAP>(*this);     }

    double  & _as_double() { return boost::get<double>(*this);  }
    int64_t & _as_int64()  { return boost::get<int64_t>(*this); }
    STR     & _as_string() { return boost::get<STR>(*this);     }
    VEC     & _as_vector() { return boost::get<VEC>(*this);     }
    MAP     & _as_map()    { return boost::get<MAP>(*this);     }
};

namespace msgpack {
    MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
        namespace adaptor {
            template <> struct convert<STR> {
                msgpack::object const& operator()(msgpack::object const& o, STR& v) const {
                    switch (o.type) {
                        case msgpack::type::BIN: v.assign(o.via.bin.ptr, o.via.bin.size); break;
                        case msgpack::type::STR: v.assign(o.via.str.ptr, o.via.str.size); break;
                        default: throw msgpack::type_error(); break;
                    }
                    return o;
                }
            };

            template <> struct convert<MOVarBip> {
                msgpack::object const& operator()(msgpack::object const& o, MOVarBip& v) const {
                    switch (o.type) {
                        case msgpack::type::NIL: v = MOVarBip(); break;
                        case msgpack::type::BOOLEAN: v = static_cast<int64_t>(o.as<bool>()); break;
                        case msgpack::type::POSITIVE_INTEGER:
                            if (uint64_t temp = o.as<uint64_t>(); temp > 0x7FFFFFFFFFFFFFFF)
                                v = MOVarBip(std::to_string(temp));
                            else
                                v = static_cast<int64_t>(temp);
                            break;
                        case msgpack::type::NEGATIVE_INTEGER: v = o.as<int64_t>(); break;
                        case msgpack::type::FLOAT32:
                        case msgpack::type::FLOAT64: v = o.as<double>(); break;
                        case msgpack::type::STR:
                        case msgpack::type::BIN: v = o.as<STR>(); break;
                        case msgpack::type::ARRAY: v = o.as<VEC>(); break;
                        case msgpack::type::MAP: v = o.as<MAP>(); break;
                        case msgpack::type::EXT: throw msgpack::type_error(); break;
                    }
                    return o;
                }
            };

            template <> struct convert<MAP> {
                msgpack::object const& operator()(msgpack::object const& o, MAP& m) const {
                    if (o.type != msgpack::type::MAP)
                        throw msgpack::type_error();

                    m.clear();
                    for (auto p = o.via.map.ptr, pend = p + o.via.map.size; p != pend; ++p) {
                        auto [it, uniqueOk] = m.emplace(p->key.as<STR>(), MOVarBip{});
                        assert(uniqueOk);

                        p->val.convert(it->second);
                    }
                    return o;
                }
            };

            template <> struct convert<VEC> {
                msgpack::object const& operator()(msgpack::object const& o, VEC& v) const {
                    if (o.type != msgpack::type::ARRAY)
                        throw msgpack::type_error();

                    for (auto p = o.via.array.ptr, pend = p + o.via.array.size; p != pend; ++p)
                        p->convert(v.emplace_back());

                    return o;
                }
            };
        } // namespace adaptor
    }
} // namespace msgpack

#include <chrono>
#include <fstream>
static constexpr auto now = std::chrono::high_resolution_clock::now;

static void timed_load(std::string filename, MAP& into) {
    auto startTime = now();

    std::ifstream           file(filename, std::ios::binary);
    std::vector<char> const buffer(std::istreambuf_iterator<char>(file), {});

    msgpack::object_handle oh           = msgpack::unpack(buffer.data(), buffer.size());
    msgpack::object const& deserialized = oh.get();

    deserialized.convert(into);

    std::cout << "Duration: " << (now() - startTime) / 1ms << "ms" << std::endl;
}

int main() {
    {
        auto& bip_map = *segment->find_or_construct<MAP>("bip_map")();

        timed_load("big_map.msgpack", bip_map);

        json_dump(bip_map, "big_map.json");
    }

    segment.reset(); // close before remove
    bip::shared_memory_object::remove(sharedMemoryName.c_str());
}

#include <boost/json.hpp>

void json_dump(MAP const& data, std::string filename) {
    struct Vis {
        using value = boost::json::value;

        value operator()(MOVarBip const& var) const { return boost::apply_visitor(*this, var); }
        value operator()(std::nullptr_t) const { return nullptr; }
        value operator()(int64_t i) const { return i; }
        value operator()(double d) const { return d; }
        value operator()(STR const& s) const { return s.c_str(); }
        value operator()(VEC const& v) const {
            boost::json::array arr;
            for (auto& el : v)
                arr.push_back((*this)(el));
            return arr;
        }
        value operator()(MAP const& m) const {
            boost::json::object obj;
            for (auto& [k, v] : m)
                obj[k.c_str()] = (*this)(v);
            return obj;
        }
    };

    std::ofstream(filename) << Vis{}(data);
}

我已经确认在 ASAN/UBSAN 下它是干净的并且无泄漏。正如您所看到的,我抛出一个 json 转储只是为了验证数据是否可访问且有效。

在我的机器上:

总结

我想这可能是进一步优化的良好基础,例如与您已经找到的

reserve()
一起。

© www.soinside.com 2019 - 2024. All rights reserved.