所以我使用lua并用空格分割字符串来编写一种子语言。我试图让它不在括号内分开任何东西,我已经处于可以检测是否有括号的阶段。但是我想要反转括号内字符串的匹配,因为我想保留其中包含的字符串。
local function split(strng)
local __s={}
local all_included={}
local flag_table={}
local uncompiled={}
local flagged=false
local flagnum=0
local c=0
for i in string.gmatch(strng,'%S+') do
c=c+1
table.insert(all_included,i)
if(flagged==false)then
if(string.find(i,'%('or'%['or'%{'))then
flagged=true
flag_table[tostring(c)]=1
table.insert(uncompiled,i)
print'flagged'
else
table.insert(__s,i)
end
elseif(flagged==true)then
table.insert(uncompiled,i)
if(string.find(i,'%)' or '%]' or '%}'))then
flagged=false
local __=''
for i=1,#uncompiled do
__=__ .. uncompiled[i]
end
table.insert(__s,__)
print'unflagged'
end
end
end
return __s;
end
这是我的分裂代码
我根本不会使用gmatch
。
local input = " this is a string (containg some (well, many) annoying) parentheses and should be split. The string contains double spaces. What should be done? And what about trailing spaces? "
local pos = 1
local words = {}
local last_start = pos
while pos <= #input do
local char = string.byte(input, pos)
if char == string.byte(" ") then
table.insert(words, string.sub(input, last_start, pos - 1))
last_start = pos + 1
elseif char == string.byte("(") then
local depth = 1
while depth ~= 0 and pos + 1 < #input do
local char = string.byte(input, pos + 1)
if char == string.byte(")") then
depth = depth - 1
elseif char == string.byte("(") then
depth = depth + 1
end
pos = pos + 1
end
end
pos = pos + 1
end
table.insert(words, string.sub(input, last_start))
for k, v in pairs(words) do
print(k, "'" .. v .. "'")
end
输出:
1 ''
2 'this'
3 'is'
4 'a'
5 'string'
6 '(containg some (well, many) annoying)'
7 'parentheses'
8 'and'
9 'should'
10 'be'
11 'split.'
12 'The'
13 'string'
14 'contains'
15 ''
16 'double'
17 ''
18 ''
19 'spaces.'
20 'What'
21 'should'
22 'be'
23 'done?'
24 'And'
25 'what'
26 'about'
27 'trailing'
28 'spaces?'
29 ''
关于尾随空间和其他此类问题的思考留给读者练习。我试着用我使用的例子强调一些可能的问题。另外,我只看了一种括号,因为我不想怎么this (string} should be ]parsed
。
哦,如果嵌套的括号不是一个问题:上面的大部分代码可以用string.find(input, ")", pos, true)
调用替换,以找到右括号。
请注意,您不能在代码中尝试使用or
或and
模式。
"%(" or "%["
等于"%("
Lua将从左到右解释这个表达。 "%(
是一个真正的值Lua会将表达式减少到"%("
,这在逻辑上与完整表达式相同。
所以string.find(i,'%('or'%['or'%{')
只会在(
找到i
。
作为与Uli的答案类似但略有不同的方法,我首先要用括号分开。然后你可以在空格上拆分奇数字段:
split = require("split") -- https://luarocks.org/modules/telemachus/split
split__by_parentheses = function(input)
local fields = {}
local level = 0
local field = ""
for i = 1, #input do
local char = input:sub(i, i)
if char == "(" then
if level == 0 then
-- add non-parenthesized field to list
fields[#fields+1] = field
field = ""
end
level = level + 1
end
field = field .. char
if char == ")" then
level = level - 1
assert(level >= 0, 'Mismatched parentheses')
if level == 0 then
-- add parenthesized field to list
fields[#fields+1] = field
field = ""
end
end
end
assert(level == 0, 'Mismatched parentheses')
fields[#fields+1] = field
return fields
end
input = " this is a string (containg some (well, many) annoying) parentheses and should be split. The string contains double spaces. What should be done? And what about trailing spaces? "
fields = split__by_parentheses(input)
for i, field in ipairs(fields) do
print(("%d\t'%s'"):format(i, field))
if i % 2 == 1 then
for j, word in ipairs(split.split(field)) do
print(("\t%d\t%s"):format(j, word))
end
end
end
输出
1 ' this is a string '
1
2 this
3 is
4 a
5 string
6
2 '(containg some (well, many) annoying)'
3 ' parentheses and should be split. The string contains double spaces. What should be done? And what about trailing spaces? '
1
2 parentheses
3 and
4 should
5 be
6 split.
7 The
8 string
9 contains
10 double
11 spaces.
12 What
13 should
14 be
15 done?
16 And
17 what
18 about
19 trailing
20 spaces?
21