我希望我的字典不区分大小写。
我有这个示例代码:
text = "practice changing the color"
words = {'color': 'colour',
'practice': 'practise'}
def replace(words,text):
keys = words.keys()
for i in keys:
text= text.replace(i ,words[i])
return text
text = replace(words,text)
print text
输出=练习改变颜色
我想要另一个字符串
"practice changing the Color"
(其中 Color
以大写字母开头)也能给出相同的输出。
我相信有一种通用的方法可以使用
mydictionary[key.lower()]
但我不确定如何最好地将其集成到我现有的代码中。 (无论如何,如果这是一个合理、简单的方法的话)。
仅供记录。我在 Requests 上发现了一个很棒的实现:
https://github.com/kennethreitz/requests/blob/v1.2.3/requests/structurals.py#L37
当前接受的答案不适用于很多的情况,因此它不能用作直接的
dict
替代品。获得正确的 dict
替换品的一些棘手问题:
以下应该效果更好:
class CaseInsensitiveDict(dict):
@classmethod
def _k(cls, key):
return key.lower() if isinstance(key, basestring) else key
def __init__(self, *args, **kwargs):
super(CaseInsensitiveDict, self).__init__(*args, **kwargs)
self._convert_keys()
def __getitem__(self, key):
return super(CaseInsensitiveDict, self).__getitem__(self.__class__._k(key))
def __setitem__(self, key, value):
super(CaseInsensitiveDict, self).__setitem__(self.__class__._k(key), value)
def __delitem__(self, key):
return super(CaseInsensitiveDict, self).__delitem__(self.__class__._k(key))
def __contains__(self, key):
return super(CaseInsensitiveDict, self).__contains__(self.__class__._k(key))
def has_key(self, key):
return super(CaseInsensitiveDict, self).has_key(self.__class__._k(key))
def pop(self, key, *args, **kwargs):
return super(CaseInsensitiveDict, self).pop(self.__class__._k(key), *args, **kwargs)
def get(self, key, *args, **kwargs):
return super(CaseInsensitiveDict, self).get(self.__class__._k(key), *args, **kwargs)
def setdefault(self, key, *args, **kwargs):
return super(CaseInsensitiveDict, self).setdefault(self.__class__._k(key), *args, **kwargs)
def update(self, E={}, **F):
super(CaseInsensitiveDict, self).update(self.__class__(E))
super(CaseInsensitiveDict, self).update(self.__class__(**F))
def _convert_keys(self):
for k in list(self.keys()):
v = super(CaseInsensitiveDict, self).pop(k)
self.__setitem__(k, v)
如果我理解正确,并且您想要一种以不区分大小写的方式对字典进行键控的方法,一种方法是对 dict 进行子类化并重载 setter / getter:
class CaseInsensitiveDict(dict):
def __setitem__(self, key, value):
super(CaseInsensitiveDict, self).__setitem__(key.lower(), value)
def __getitem__(self, key):
return super(CaseInsensitiveDict, self).__getitem__(key.lower())
在我的特定实例中,我需要不区分大小写的查找,但是,我不想修改键的原始大小写。例如:
>>> d = {}
>>> d['MyConfig'] = 'value'
>>> d['myconfig'] = 'new_value'
>>> d
{'MyConfig': 'new_value'}
您可以看到字典仍然具有原始密钥,但是可以不区分大小写地访问它。这是一个简单的解决方案:
class CaseInsensitiveKey(object):
def __init__(self, key):
self.key = key
def __hash__(self):
return hash(self.key.lower())
def __eq__(self, other):
return self.key.lower() == other.key.lower()
def __str__(self):
return self.key
获取和设置字典中的条目都需要覆盖 __hash__ 和 __eq__ 。如果不区分大小写相等,这将创建散列到字典中相同位置的键。
现在创建一个自定义字典,使用提供的键初始化 CaseInsensitiveKey:
class CaseInsensitiveDict(dict):
def __setitem__(self, key, value):
key = CaseInsensitiveKey(key)
super(CaseInsensitiveDict, self).__setitem__(key, value)
def __getitem__(self, key):
key = CaseInsensitiveKey(key)
return super(CaseInsensitiveDict, self).__getitem__(key)
或者只需确保在使用字典时始终传递 CaseInsensitiveKey 的实例作为键。
您会考虑在输入中使用
string.lower()
并使用完全小写的字典吗?这是一个有点hacky的解决方案,但它有效
我已经通过 pleasemorebacon 修改了简单但很好的解决方案(谢谢!),使其更加紧凑、独立,并进行了一些小的更新,以允许从
{'a':1, 'B':2}
进行构建并支持 __contains__
协议。
最后,由于 CaseInsensitiveDict.Key
预计是字符串(还有什么可以区分大小写或不区分大小写),因此最好从 Key
派生 str
类,然后可以,例如,将 CaseInsensitiveDict
与 json.dumps
从盒子中取出。
# caseinsensitivedict.py
class CaseInsensitiveDict(dict):
class Key(str):
def __init__(self, key):
str.__init__(key)
def __hash__(self):
return hash(self.lower())
def __eq__(self, other):
return self.lower() == other.lower()
def __init__(self, data=None):
super(CaseInsensitiveDict, self).__init__()
if data is None:
data = {}
for key, val in data.items():
self[key] = val
def __contains__(self, key):
key = self.Key(key)
return super(CaseInsensitiveDict, self).__contains__(key)
def __setitem__(self, key, value):
key = self.Key(key)
super(CaseInsensitiveDict, self).__setitem__(key, value)
def __getitem__(self, key):
key = self.Key(key)
return super(CaseInsensitiveDict, self).__getitem__(key)
对于那些喜欢检查实际情况的人来说,这是一个基本的测试脚本:
# test_CaseInsensitiveDict.py
import json
import unittest
from caseinsensitivedict import *
class Key(unittest.TestCase):
def setUp(self):
self.Key = CaseInsensitiveDict.Key
self.lower = self.Key('a')
self.upper = self.Key('A')
def test_eq(self):
self.assertEqual(self.lower, self.upper)
def test_hash(self):
self.assertEqual(hash(self.lower), hash(self.upper))
def test_str(self):
self.assertEqual(str(self.lower), 'a')
self.assertEqual(str(self.upper), 'A')
class Dict(unittest.TestCase):
def setUp(self):
self.Dict = CaseInsensitiveDict
self.d1 = self.Dict()
self.d2 = self.Dict()
self.d1['a'] = 1
self.d1['B'] = 2
self.d2['A'] = 1
self.d2['b'] = 2
def test_contains(self):
self.assertIn('B', self.d1)
d = self.Dict({'a':1, 'B':2})
self.assertIn('b', d)
def test_init(self):
d = self.Dict()
self.assertFalse(d)
d = self.Dict({'a':1, 'B':2})
self.assertTrue(d)
def test_items(self):
self.assertDictEqual(self.d1, self.d2)
self.assertEqual(
[v for v in self.d1.items()],
[v for v in self.d2.items()])
def test_json_dumps(self):
s = json.dumps(self.d1)
self.assertIn('a', s)
self.assertIn('B', s)
def test_keys(self):
self.assertEqual(self.d1.keys(), self.d2.keys())
def test_values(self):
self.assertEqual(
[v for v in self.d1.values()],
[v for v in self.d2.values()])
您可以使用单行命令进行不区分大小写的字典搜索:
>>> input_dict = {'aBc':1, 'xyZ':2}
>>> search_string = 'ABC'
>>> next((value for key, value in input_dict.items() if key.lower()==search_string.lower()), None)
1
>>> search_string = 'EFG'
>>> next((value for key, value in input_dict.items() if key.lower()==search_string.lower()), None)
>>>
您可以将其放入函数中:
def get_case_insensitive_key_value(input_dict, key):
return next((value for dict_key, value in input_dict.items() if dict_key.lower() == key.lower()), None)
请注意,仅返回第一个匹配项。
虽然不区分大小写的字典是一种解决方案,并且有如何实现该解决方案的答案,但在这种情况下可能有一种更简单的方法。不区分大小写的搜索就足够了:
import re
text = "Practice changing the Color"
words = {'color': 'colour', 'practice': 'practise'}
def replace(words,text):
keys = words.keys()
for i in keys:
exp = re.compile(i, re.I)
text = re.sub(exp, words[i], text)
return text
text = replace(words,text)
print text
如果您只需要在代码中执行一次此操作(因此,不指向函数),则处理该问题的最直接方法是:
lowercase_dict = {key.lower():original_dict 中(键,值)的值}
我在这里假设所讨论的字典并不是那么大——复制它可能不太雅观,但如果它不大,就不会造成任何伤害。
相对于@Fred的答案(尽管这也有效)的优点是,当密钥不存在时,它会产生与字典相同的结果:KeyError。
解决这个问题有多种方法,每种方法都有其优点和缺点。只是为了添加到列表中(看起来没有提到这个选项),可以扩展
str
类并将其用作键:
class CaseInsensitiveStr(str):
def __hash__(self) -> 'int':
return hash(self.lower())
def __eq__(self, other:'str') -> 'bool':
return self.lower() == other.lower()
如果相关字典是私有的并且使用某种接口来访问它,它可以很好地工作。
class MyThing:
def __init__(self):
self._d: 'dict[CaseInsensitiveStr, int]' = dict()
def set(self, key:'str', value:'int'):
self._d[CaseInsensitiveStr(key)] = value
def get(self, key:'str') -> 'int':
return self._d[CaseInsensitiveStr(key)]
信用:基于@m000的回答。以下变体提供了 get_orig_key 方法,通过跟踪最后一个“设置”操作的区分大小写的密钥。
class RobbieCaseInsensitiveDict(dict):
@classmethod
def _k(cls, key):
return key.lower() if isinstance(key, str) else key
def __init__(self, *args, **kwargs):
super(RobbieCaseInsensitiveDict, self).__init__(*args, **kwargs)
self.key_dict = {}
for key in self.keys():
if isinstance(key, str):
self.key_dict[key.lower()] = key
self._convert_keys()
def get_orig_key(self, case_ins_key):
if case_ins_key in self.key_dict:
return self.key_dict[case_ins_key]
else:
return case_ins_key
def __getitem__(self, key):
return super(RobbieCaseInsensitiveDict, self).__getitem__(self.__class__._k(key))
def __setitem__(self, key, value):
if isinstance(key, str):
self.key_dict[key.lower()] = key
super(RobbieCaseInsensitiveDict, self).__setitem__(self.__class__._k(key), value)
def __delitem__(self, key):
return super(RobbieCaseInsensitiveDict, self).__delitem__(self.__class__._k(key))
def __contains__(self, key):
return super(RobbieCaseInsensitiveDict, self).__contains__(self.__class__._k(key))
def has_key(self, key):
return super(RobbieCaseInsensitiveDict, self).has_key(self.__class__._k(key))
def pop(self, key, *args, **kwargs):
return super(RobbieCaseInsensitiveDict, self).pop(self.__class__._k(key), *args, **kwargs)
def get(self, key, *args, **kwargs):
return super(RobbieCaseInsensitiveDict, self).get(self.__class__._k(key), *args, **kwargs)
def setdefault(self, key, *args, **kwargs):
if isintance(key, str):
self.key_dict[key.lower()] = key
return super(RobbieCaseInsensitiveDict, self).setdefault(self.__class__._k(key), *args, **kwargs)
def update(self, E={}, **F):
super(RobbieCaseInsensitiveDict, self).update(self.__class__(E))
super(RobbieCaseInsensitiveDict, self).update(self.__class__(**F))
def _convert_keys(self):
for k in list(self.keys()):
v = super(RobbieCaseInsensitiveDict, self).pop(k)
self.__setitem__(k, v)
这是一个与 Python 3 兼容的简单且正确的实现。希望也足够简单以供人类理解。其他答案仍然尝试与Python 2兼容。
from typing import Mapping
class CaseInsensitiveDict(dict):
def __init__(self, data=None, **kwargs):
self.update(data, **kwargs)
def get(self, k, default=None):
return super().get(k.lower(), default)
def pop(self, k):
return super().pop(k.lower())
def setdefault(self, k, default=None):
return super().setdefault(k.lower(), default)
def update(self, data=None, **kwargs):
if data is not None:
if isinstance(data, Mapping):
super().update((k.lower(), v) for k, v in data.items())
else:
super().update((k.lower(), v) for k, v in data)
super().update((k.lower(), v) for k, v in kwargs)
def __contains__(self, k):
return super().__contains__(k.lower())
def __delitem__(self, k):
super().__delitem__(k.lower())
def __eq__(self, other):
if not isinstance(other, Mapping):
return False
if not isinstance(other, CaseInsensitiveDict):
other = CaseInsensitiveDict(other)
return super().__eq__(other)
def __getitem__(self, k):
return super().__getitem__(k.lower())
def __setitem__(self, k, v):
super().__setitem__(k.lower(), v)
我刚刚设置了一个函数来处理这个问题:
def setLCdict(d, k, v):
k = k.lower()
d[k] = v
return d
myDict = {}
所以代替
myDict['A'] = 1
myDict['B'] = 2
您可以:
myDict = setLCdict(myDict, 'A', 1)
myDict = setLCdict(myDict, 'B', 2)
然后,您可以在查找之前将值小写,或者编写一个函数来执行此操作。
def lookupLCdict(d, k):
k = k.lower()
return d[k]
myVal = lookupLCdict(myDict, 'a')
如果您想在全球范围内执行此操作,则可能不理想,但如果它只是您希望使用它的子集,则效果很好。