假设我在 python 中有这个字典,在模块级别定义(
mysettings.py
):
settings = {
'expensive1' : expensive_to_compute(1),
'expensive2' : expensive_to_compute(2),
...
}
我希望在访问键时计算这些值:
from mysettings import settings # settings is only "prepared"
print settings['expensive1'] # Now the value is really computed.
这可能吗?怎么办?
不要继承内置字典。即使您覆盖
dict.__getitem__()
方法,dict.get()
也不会按您的预期工作。
正确的方法是从
abc.Mapping
继承collections
。
from collections.abc import Mapping
class LazyDict(Mapping):
def __init__(self, *args, **kw):
self._raw_dict = dict(*args, **kw)
def __getitem__(self, key):
func, arg = self._raw_dict.__getitem__(key)
return func(arg)
def __iter__(self):
return iter(self._raw_dict)
def __len__(self):
return len(self._raw_dict)
然后你可以这样做:
settings = LazyDict({
'expensive1': (expensive_to_compute, 1),
'expensive2': (expensive_to_compute, 2),
})
我还在这里列出了示例代码和示例:https://gist.github.com/gyli/9b50bb8537069b4e154fec41a4b5995a
如果你不将参数与可调用的参数分开,我认为这是不可能的。但是,这应该有效:
class MySettingsDict(dict):
def __getitem__(self, item):
function, arg = dict.__getitem__(self, item)
return function(arg)
def expensive_to_compute(arg):
return arg * 3
现在:
>>> settings = MySettingsDict({
'expensive1': (expensive_to_compute, 1),
'expensive2': (expensive_to_compute, 2),
})
>>> settings['expensive1']
3
>>> settings['expensive2']
6
编辑:
如果要多次访问
expensive_to_compute
的结果,您可能还想缓存它们。像这样的东西
class MySettingsDict(dict):
def __getitem__(self, item):
value = dict.__getitem__(self, item)
if not isinstance(value, int):
function, arg = value
value = function(arg)
dict.__setitem__(self, item, value)
return value
现在:
>>> settings.values()
dict_values([(<function expensive_to_compute at 0x9b0a62c>, 2),
(<function expensive_to_compute at 0x9b0a62c>, 1)])
>>> settings['expensive1']
3
>>> settings.values()
dict_values([(<function expensive_to_compute at 0x9b0a62c>, 2), 3])
您可能还想重写其他
dict
方法,具体取决于您想要如何使用字典。
将对函数的引用存储为键的值,即:
def A():
return "that took ages"
def B():
return "that took for-ever"
settings = {
"A": A,
"B": B,
}
print(settings["A"]())
这样,您仅在访问并调用它时评估与键关联的函数。可以处理非惰性值的合适类是:
import types
class LazyDict(dict):
def __getitem__(self,key):
item = dict.__getitem__(self,key)
if isinstance(item,types.FunctionType):
return item()
else:
return item
用途:
settings = LazyDict([("A",A),("B",B)])
print(settings["A"])
>>>
that took ages
您可以将
expensive_to_compute
设为生成器函数:
settings = {
'expensive1' : expensive_to_compute(1),
'expensive2' : expensive_to_compute(2),
}
然后尝试:
from mysettings import settings
print next(settings['expensive1'])
我会用可调用项填充字典值,并在读取时将它们更改为结果。
class LazyDict(dict):
def __getitem__(self, k):
v = super().__getitem__(k)
if callable(v):
v = v()
super().__setitem__(k, v)
return v
def get(self, k, default=None):
if k in self:
return self.__getitem__(k)
return default
然后与
def expensive_to_compute(arg):
print('Doing heavy stuff')
return arg * 3
你可以这样做:
>>> settings = LazyDict({
'expensive1': lambda: expensive_to_compute(1),
'expensive2': lambda: expensive_to_compute(2),
})
>>> settings.__repr__()
"{'expensive1': <function <lambda> at 0x000001A0BA2B8EA0>, 'expensive2': <function <lambda> at 0x000001A0BA2B8F28>}"
>>> settings['expensive1']
Doing heavy stuff
3
>>> settings.get('expensive2')
Doing heavy stuff
6
>>> settings.__repr__()
"{'expensive1': 3, 'expensive2': 6}"
我最近需要类似的东西。混合使用Guangyang Li和michaelmeyer的两种策略,我是这样做的:
class LazyDict(MutableMapping):
"""Lazily evaluated dictionary."""
function = None
def __init__(self, *args, **kargs):
self._dict = dict(*args, **kargs)
def __getitem__(self, key):
"""Evaluate value."""
value = self._dict[key]
if not isinstance(value, ccData):
value = self.function(value)
self._dict[key] = value
return value
def __setitem__(self, key, value):
"""Store value lazily."""
self._dict[key] = value
def __delitem__(self, key):
"""Delete value."""
return self._dict[key]
def __iter__(self):
"""Iterate over dictionary."""
return iter(self._dict)
def __len__(self):
"""Evaluate size of dictionary."""
return len(self._dict)
让我们惰性地评估以下函数:
def expensive_to_compute(arg):
return arg * 3
优点是函数尚未在对象内定义,参数是实际存储的参数(这正是我需要的):
>>> settings = LazyDict({'expensive1': 1, 'expensive2': 2})
>>> settings.function = expensive_to_compute # function unknown until now!
>>> settings['expensive1']
3
>>> settings['expensive2']
6
此方法仅适用于单个函数。
我可以指出以下优点:
MutableMapping
API传入一个函数来生成第一个属性 get 的值:
class LazyDict(dict):
""" Fill in the values of a dict at first access """
def __init__(self, fn, *args, **kwargs):
self._fn = fn
self._fn_args = args or []
self._fn_kwargs = kwargs or {}
return super(LazyDict, self).__init__()
def _fn_populate(self):
if self._fn:
self._fn(self, *self._fn_args, **self._fn_kwargs)
self._fn = self._fn_args = self._fn_kwargs = None
def __getattribute__(self, name):
if not name.startswith('_fn'):
self._fn_populate()
return super(LazyDict, self).__getattribute__(name)
def __getitem__(self, item):
self._fn_populate()
return super(LazyDict, self).__getitem__(item)
>>> def _fn(self, val):
... print 'lazy loading'
... self['foo'] = val
...
>>> d = LazyDict(_fn, 'bar')
>>> d
{}
>>> d['foo']
lazy loading
'bar'
>>>
或者,可以使用 LazyDictionary 包来创建线程安全的惰性字典。
安装:
pip install lazydict
用途:
from lazydict import LazyDictionary
import tempfile
lazy = LazyDictionary()
lazy['temp'] = lambda: tempfile.mkdtemp()
添加此解决方案:利用
__missing__
’ 的 dict
属性,如果为 dict
设置该属性,将在丢失的键上调用;有效的缓存加载模式。
下面是一个类型感知的实现——这不是必需的,但有它就好了。在OP的提交中,人们可以使用他们选择的简单调度函数轻松地进行初始化。这提供了最大的灵活性,而且开销很小。
from typing import Dict, TypeVar, Callable
_KT = TypeVar("_KT")
_VT = TypeVar("_VT")
class LoadingDict(dict[_KT, _VT]):
def __init__(self, fn: Callable[[_KT], _VT], **kwargs):
if not callable(fn):
raise TypeError(type(fn))
super().__init__(**kwargs)
self._fn = fn
def __missing__(self, key: _KT) -> _VT:
if not isinstance(key, self.__orig_class__.__args__[0]):
raise ValueError(type(key))
self[key] = v = self._fn(key)
return v
#### Usage
d = LoadingDict[str, int](len, **{"1": -1})
assert d["1"] == -1
assert d["xxx"] == 3
assert d["xxxx"] == 4