我试图为下面的示例找到顺序字符串,但无法获得输出。有人可以帮忙吗
数据=[“ABC”,“ABD”,“ABE”,“AB1”,“AB2”,“AB3”,“ab8”,“ab9”,“ab10”,“a123b”,“a124b”,“a125b” ”、“123-43”、“123-44”、“123-45”、“retgoven”、“ahilgdb”]
所需输出=[ABC", "ABD", "ABE", "AB1", "AB2", "AB3", "ab8", "ab9", "ab10", "a123b", "a124b", "a125b" ”、“123-43”、“123-44”、“123-45”]
我使用了下面的代码,但输出没有顺序字符串。
import re
def split_string(s):
"""Split a string into segments of numbers and letters."""
return re.findall(r'\d+|\D+', s)
def is_sequential_string(prev_string, curr_string):
"""
Check if curr_string is the sequential successor of prev_string.
"""
prev_parts = split_string(prev_string)
curr_parts = split_string(curr_string)
if len(prev_parts) != len(curr_parts):
return False
for prev, curr in zip(prev_parts, curr_parts):
if prev.isdigit() and curr.isdigit():
if int(curr) != int(prev) + 1:
return False
elif prev.isalpha() and curr.isalpha():
if len(prev) == 1 and len(curr) == 1: # Only single characters
if ord(curr) != ord(prev) + 1:
return False
else:
return False
else:
return False
return True
def find_sequential_strings(data):
"""
Find all sequential strings in the list.
"""
string_data = [item for item in data if isinstance(item, str)]
sequential_strings = []
i = 0
while i < len(string_data) - 1:
temp_seq = [string_data[i]]
while i < len(string_data) - 1 and is_sequential_string(string_data[i], string_data[i + 1]):
temp_seq.append(string_data[i + 1])
i += 1
if len(temp_seq) > 1:
sequential_strings.extend(temp_seq)
i += 1
return sequential_strings
# Sample data
data = [
112300150226, 112300150226, 112300150227, 112300150228,
1123071, 1123071, 1123072, 1123073,
"ABC", "ABD", "ABE", "AB1", "AB2", "AB3",
"ab8", "ab9", "ab10", "a123b", "a124b", "a125b",
"123-43", "123-44", "123-45", 10017, 10027, 10037, 10047
]
# Find sequential strings
sequential_strings = find_sequential_strings(data)
# Display the results
print("Sequential Strings:")
if sequential_strings:
for seq_string in sequential_strings:
print(seq_string)
else:
print("No sequential strings found.")
我仍然不清楚正确的输出是什么。
但是,我们可以定义一些方法,全部(ON),来分解每个字符串,检查数字、小写字母、大写字母,并查看它们是否按顺序排列。
如果您愿意或者它适用于您的序列定义,您可以利用
ord()
。
import string
def _sanatize(s):
low, up, dig = '', '', ''
for char in s:
if char in string.ascii_uppercase:
up += char
if char in string.ascii_lowercase:
low += char
if char in string.digits:
dig += char
return low, up, dig
def _check(s):
i, j = 0, 0
invalid = False
while i + j < len(s) - 1:
i += 1
if s[i + j - 1] > s[i + j]:
invalid = True
break
return True if not invalid else False
def _check_parts(s):
low, up, dig = _sanatize(s)
if not _check(low) or not _check(up) or not _check(dig):
return False
return True
def get_seq(data):
res = []
for s in data:
if _check_parts(s):
res += [s]
return res
data = ["ABC", "ABD", "ABE", "AB1", "AB2", "AB3", "ab8", "ab9", "ab10", "a123b",
"a124b", "a125b", "123-43", "123-44", "123-45", "retgoven", "ahilgdb", "AB21"]
print(get_seq(data))
['ABC'、'ABD'、'ABE'、'AB1'、'AB2'、'AB3'、'ab8'、'ab9'、'a123b'、'a124b'、'a125b'、'123-44 ', '123-45']