我想知道是否有任何方法可以轻松地在空格处分割字符串,除非空格位于引号内?
例如改变
Foo bar random "letters lol" stuff
进入
Foo
、bar
、random
、"letters lol"
、stuff
想一想。您有一个逗号分隔值 (CSV) 文件格式的字符串,RFC4180,但分隔符(外部引号对)是空格(而不是逗号)。例如,
package main
import (
"encoding/csv"
"fmt"
"strings"
)
func main() {
s := `Foo bar random "letters lol" stuff`
fmt.Printf("String:\n%q\n", s)
// Split string
r := csv.NewReader(strings.NewReader(s))
r.Comma = ' ' // space
fields, err := r.Read()
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("\nFields:\n")
for _, field := range fields {
fmt.Printf("%q\n", field)
}
}
游乐场:https://play.golang.org/p/Ed4IV97L7H
输出:
String:
"Foo bar random \"letters lol\" stuff"
Fields:
"Foo"
"bar"
"random"
"letters lol"
"stuff"
strings.FieldsFunc
尝试这个:package main
import (
"fmt"
"strings"
)
func main() {
s := `Foo bar random "letters lol" stuff`
quoted := false
a := strings.FieldsFunc(s, func(r rune) bool {
if r == '"' {
quoted = !quoted
}
return !quoted && r == ' '
})
out := strings.Join(a, ", ")
fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}
strings.Builder
和 range
并根据您的意愿保留或不保留 "
,尝试 thispackage main
import (
"fmt"
"strings"
)
func main() {
s := `Foo bar random "letters lol" stuff`
a := []string{}
sb := &strings.Builder{}
quoted := false
for _, r := range s {
if r == '"' {
quoted = !quoted
sb.WriteRune(r) // keep '"' otherwise comment this line
} else if !quoted && r == ' ' {
a = append(a, sb.String())
sb.Reset()
} else {
sb.WriteRune(r)
}
}
if sb.Len() > 0 {
a = append(a, sb.String())
}
out := strings.Join(a, ", ")
fmt.Println(out) // Foo, bar, random, "letters lol", stuff
// not keep '"': // Foo, bar, random, letters lol, stuff
}
scanner.Scanner
,尝试这个:package main
import (
"fmt"
"strings"
"text/scanner"
)
func main() {
var s scanner.Scanner
s.Init(strings.NewReader(`Foo bar random "letters lol" stuff`))
slice := make([]string, 0, 5)
tok := s.Scan()
for tok != scanner.EOF {
slice = append(slice, s.TokenText())
tok = s.Scan()
}
out := strings.Join(slice, ", ")
fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}
csv.NewReader
删除 "
本身,尝试 this:package main
import (
"encoding/csv"
"fmt"
"log"
"strings"
)
func main() {
s := `Foo bar random "letters lol" stuff`
r := csv.NewReader(strings.NewReader(s))
r.Comma = ' '
record, err := r.Read()
if err != nil {
log.Fatal(err)
}
out := strings.Join(record, ", ")
fmt.Println(out) // Foo, bar, random, letters lol, stuff
}
regexp
,尝试这个:package main
import (
"fmt"
"regexp"
"strings"
)
func main() {
s := `Foo bar random "letters lol" stuff`
r := regexp.MustCompile(`[^\s"]+|"([^"]*)"`)
a := r.FindAllString(s, -1)
out := strings.Join(a, ", ")
fmt.Println(out) // Foo, bar, random, "letters lol", stuff
}
你可以使用正则表达式
这个(go Playground)将涵盖引号内多个单词和数组中多个引用条目的所有用例:
package main
import (
"fmt"
"regexp"
)
func main() {
s := `Foo bar random "letters lol" stuff "also will" work on "multiple quoted stuff"`
r := regexp.MustCompile(`[^\s"']+|"([^"]*)"|'([^']*)`)
arr := r.FindAllString(s, -1)
fmt.Println("your array: ", arr)
}
输出将是:
[Foo, bar, random, "letters lol", stuff, "also will", work, on, "multiple quoted stuff"]
如果您想了解有关正则表达式的更多信息,这里有一个很好的答案,最后有超级方便的资源 - 学习正则表达式
希望这有帮助
稍微概括一下问题,此解决方案允许使用几种不同的引号分隔符(
'
、"
等)和字段分隔符(
、,
等)。
package main
import (
"fmt"
"strings"
)
func Split(s string, quote []rune, delim []rune) []string {
const none = '\000'
open := none
return strings.FieldsFunc(s, func(r rune) bool {
switch {
case open == none:
if strings.ContainsRune(string(quote), r) {
open = r
return false
}
return strings.ContainsRune(string(delim), r)
case open == r:
open = none
}
return false
})
}
func ExampleSplit() {
s := `Foo 'bar, "ran"dom' "le'tt'er's lol"stuff,xyz`
a := Split(s, []rune(`'"`), []rune(" ,"))
fmt.Println(strings.Join(a, "|"))
// Output: Foo|'bar, "ran"dom'|"le'tt'er's lol"stuff|xyz
}
通过用简单的循环替换
strings.ContainsRune
和 strings.FieldsFunc
调用,这两个限制都可以轻松解决。