下面的字符串是请求json文件的结果。
StrResult ="\u00D8\u00B3\u00D9\u0084\u00D8\u00A7\u00D9\u0085
\u00D8\u00AF\u00D9\u0086\u00DB\u008C\u00D8\u00A7"
如何将这个字符串转换为可读字符? 有很多其他语言的示例代码,包括Python、.Net等,但我找不到VB6的任何代码。如果您知道的话,谢谢您。这对我来说至关重要。
提示:解码后应接收到的字符串是“มรรรร”,其对应的英文是“Hello World”。
您提供的字符串不会解码为“?????”,而是解码为“سÙا٠Ø̄ÙÛا”。您可以在此处确认。
您的字符串实际包含的是单独的 UTF-8 字节,而不是 Unicode 代码点。这使您的任务变得更加困难,因为 VB6 字符串通常在内存中以 UTF-16 编码。
我最近开发了一个库,其中包含用于 VBA 的广泛字符串功能,但我认为有关转义和取消转义 Unicode 文字的部分应该可以像 VB6 代码一样工作。您可以在 GitHub here 上找到整个库,但我可以在此处直接包含应该解决您问题的部分。
使用下面我提供的库中的函数,您应该能够实现您想要的结果,如下所示:
StrResult = DecodeUTF8(EncodeANSI(UnescapeUnicode(StrResult)))
这些是所需的功能:
Option Explicit
Public Enum UnicodeEscapeFormat
[_efNone] = 0
efPython = 1 '\uXXXX \u00XXXXXX (4 or 8 hex digits, 8 for chars outside BMP)
efRust = 2 '\u{X} \U{XXXXXX} (1 to 6 hex digits)
efUPlus = 4 'u+XXXX u+XXXXXX (4 or 6 hex digits)
efMarkup = 8 '&#ddddddd; (1 to 7 decimal digits)
efAll = 15
[_efMin] = efPython
[_efMax] = efAll
End Enum
Private Type EscapeSequence
ueFormat As UnicodeEscapeFormat
ueSignature As String
letSngSurrogate As Boolean
buffPosition As Long
currPosition As Long
sigSize As Long
escSize As Long
codepoint As Long
unEscSize As Long
End Type
Private Type TwoCharTemplate
s As String * 2
End Type
Private Type LongTemplate
l As Long
End Type
'Replaces all occurences of unicode characters outside the codePoint range
'defined by maxNonEscapedCharCode with literals of the following formats
'specified by `escapeFormat`:
' efPython = 1 ... \uXXXX \u00XXXXXX (4 or 8 hex digits, 8 for chars outside BMP)
' efRust = 2 ... \u{XXXX} \U{XXXXXX} (1 to 6 hex digits)
' efUPlus = 4 ... u+XXXX u+XXXXXX (4 or 6 hex digits)
' efMarkup = 8 ... &#ddddddd; (1 to 7 decimal digits)
'Where:
' - prefixes \u is case insensitive
' - Xes are the digits of the codepoint in hexadecimal. (X = 0-9 or A-F/a-f)
'Note:
' - Avoid u+XXXX syntax if string contains literals without delimiters as it
' can be misinterpreted if adjacent to text starting with 0-9 or a-f.
' - This function accepts all combinations of UnicodeEscapeFormats:
' If called with, e.g. `escapeFormat = efRust Or efPython`, every character
' in the scope will be escaped with in either format, efRust or efPython,
' chosen at random for each replacement.
' - If `escapeFormat` is set to efAll, it will replace every character in the
' scope with a randomly chosen format of all available fotrmats.
' - To escape every character, set `maxNonEscapedCharCode = -1`
Public Function EscapeUnicode(ByRef str As String, _
Optional ByVal maxNonEscapedCharCode As Long = &HFF, _
Optional ByVal escapeFormat As UnicodeEscapeFormat _
= efPython) As String
Const methodName As String = "EscapeUnicode"
If maxNonEscapedCharCode < -1 Then Err.Raise 5, methodName, _
"`maxNonEscapedCharCode` must be greater or equal -1."
If escapeFormat < [_efMin] Or escapeFormat > [_efMax] Then _
Err.Raise 5, methodName, "Invalid escape type."
If Len(str) = 0 Then Exit Function
Dim i As Long
Dim j As Long: j = 1
Dim result() As String: ReDim result(1 To Len(str))
Dim copyChunkSize As Long
Dim rndEscapeFormat As Boolean
rndEscapeFormat = ((escapeFormat And (escapeFormat - 1)) <> 0) 'eFmt <> 2^n
Dim numescapeFormats As Long
If rndEscapeFormat Then
Dim escapeFormats() As Long
For i = 0 To (Log(efAll + 1) / Log(2)) - 1
If 2 ^ i And escapeFormat Then
ReDim Preserve escapeFormats(0 To numescapeFormats)
escapeFormats(numescapeFormats) = 2 ^ i
numescapeFormats = numescapeFormats + 1
End If
Next i
End If
For i = 1 To Len(str)
Dim codepoint As Long: codepoint = AscU(Mid$(str, i, 2))
If codepoint > maxNonEscapedCharCode Then
If copyChunkSize > 0 Then
result(j) = Mid$(str, i - copyChunkSize, copyChunkSize)
copyChunkSize = 0
j = j + 1
End If
If rndEscapeFormat Then
escapeFormat = escapeFormats(Int(numescapeFormats * Rnd))
End If
Select Case escapeFormat
Case efPython
If codepoint > &HFFFF& Then 'Outside BMP
result(j) = "\u" & "00" & Right$("0" & Hex(codepoint), 6)
Else 'BMP
result(j) = "\u" & Right$("000" & Hex(codepoint), 4)
End If
Case efRust
result(j) = "\u{" & Hex(codepoint) & "}"
Case efUPlus
If codepoint < &H1000& Then
result(j) = "u+" & Right$("000" & Hex(codepoint), 4)
Else
result(j) = "u+" & Hex(codepoint)
End If
Case efMarkup
result(j) = "&#" & codepoint & ";"
End Select
If rndEscapeFormat Then
If Int(2 * Rnd) = 1 Then result(j) = UCase(result(j))
End If
j = j + 1
Else
If codepoint < &H10000 Then
copyChunkSize = copyChunkSize + 1
Else
copyChunkSize = copyChunkSize + 2
End If
End If
If codepoint > &HFFFF& Then i = i + 1
Next i
If copyChunkSize > 0 Then _
result(j) = Mid$(str, i - copyChunkSize, copyChunkSize)
EscapeUnicode = Join(result, "")
End Function
'Replaces all occurences of unicode literals
'Accepts the following formattings `escapeFormat`:
' efPython = 1 ... \uXXXX \u000XXXXX (4 or 8 hex digits, 8 for chars outside BMP)
' efRust = 2 ... \u{XXXX} \U{XXXXXXX} (1 to 6 hex digits)
' efUPlus = 4 ... u+XXXX u+XXXXXX (4 or 6 hex digits)
' efMarkup = 8 ... &#ddddddd; (1 to 7 decimal digits)
'Where:
' - prefixes \u is case insensitive
' - Xes are the digits of the codepoint in hexadecimal. (X = 0-9 or A-F/a-f)
'Example:
' - "abcd au+0062\U0063xy\u{64}", efAll returns "abcd abcxyd"
'Notes:
' - Avoid u+XXXX syntax if string contains literals without delimiters as it
' can be misinterpreted if adjacent to text starting with 0-9 or a-f.
' - This function also accepts all combinations of UnicodeEscapeFormats:
' E.g.:
'UnescapeUnicode("abcd au+0062\U0063xy\u{64}", efMarkup Or efRust)
' will return:
'"abcd au+0062\U0063xyd"
' - By default, this function will not invalidate UTF-16 strings if they are
' currently valid, but this can happen if `allowSingleSurrogates = True`
' E.g.: EscapeUnicode(ChrU(&HD801&, True)) returns "\uD801", but this string
' can no longer be un-escaped with UnescapeUnicode because "\uD801"
' represents a surrogate halve which is invalid unicode on its own.
' So UnescapeUnicode("\uD801") returns "\uD801" again, unless called with
' the optional parameter `allowSingleSurrogates = False` like this
' `UnescapeUnicode("\uD801", , True)`. This will return invalid UTF-16.
Public Function UnescapeUnicode(ByRef str As String, _
Optional ByVal escapeFormat As UnicodeEscapeFormat = efAll, _
Optional ByVal allowSingleSurrogates As Boolean = False) _
As String
If escapeFormat < [_efMin] Or escapeFormat > [_efMax] Then
Err.Raise 5, "EscapeUnicode", "Invalid escape format"
End If
Dim escapes() As EscapeSequence: escapes = NewEscapes()
Dim lb As Long: lb = LBound(escapes)
Dim ub As Long: ub = UBound(escapes)
Dim i As Long
For i = lb To ub 'Find first signature for each wanted format
With escapes(i)
If escapeFormat And .ueFormat Then
.buffPosition = InStr(1, str, .ueSignature, vbBinaryCompare)
.letSngSurrogate = allowSingleSurrogates
End If
End With
Next i
UnescapeUnicode = str 'Allocate buffer
Const posByte As Byte = &H80
Const buffSize As Long = 1024
Dim buffSignaturePos(1 To buffSize) As Byte
Dim buffFormat(1 To buffSize) As UnicodeEscapeFormat
Dim buffEscIndex(1 To buffSize) As Long
Dim posOffset As Long
Dim diff As Long
Dim highSur As Long
Dim lowSur As Long
Dim remainingLen As Long: remainingLen = Len(str)
Dim posChar As String: posChar = ChrB$(posByte)
Dim outPos As Long: outPos = 1
Dim inPos As Long: inPos = 1
Do
Dim upperLimit As Long: upperLimit = posOffset + buffSize
For i = lb To ub 'Find all signatures within buffer size
With escapes(i)
Do Until .buffPosition = 0 Or .buffPosition > upperLimit
.buffPosition = .buffPosition - posOffset
buffSignaturePos(.buffPosition) = posByte
buffFormat(.buffPosition) = .ueFormat
buffEscIndex(.buffPosition) = i
.buffPosition = .buffPosition + .sigSize + posOffset
.buffPosition = InStr(.buffPosition, str, .ueSignature)
Loop
End With
Next i
Dim temp As String: temp = buffSignaturePos
Dim nextPos As Long: nextPos = InStrB(1, temp, posChar)
Do Until nextPos = 0 'Unescape all found signatures from buffer
i = buffEscIndex(nextPos)
escapes(i).currPosition = nextPos + posOffset
Select Case buffFormat(nextPos)
Case efPython: TryPythonEscape escapes(i), str
Case efRust: TryRustEscape escapes(i), str
Case efUPlus: TryUPlusEscape escapes(i), str
Case efMarkup: TryMarkupEscape escapes(i), str
End Select
With escapes(i)
If .unEscSize > 0 Then
diff = .currPosition - inPos
If outPos > 1 Then
Mid$(UnescapeUnicode, outPos) = Mid$(str, inPos, diff)
End If
outPos = outPos + diff
If .unEscSize = 1 Then
Mid$(UnescapeUnicode, outPos) = ChrW$(.codepoint)
Else
.codepoint = .codepoint - &H10000
highSur = &HD800& Or (.codepoint \ &H400&)
lowSur = &HDC00& Or (.codepoint And &H3FF&)
Mid$(UnescapeUnicode, outPos) = ChrW$(highSur)
Mid$(UnescapeUnicode, outPos + 1) = ChrW$(lowSur)
End If
outPos = outPos + .unEscSize
inPos = .currPosition + .escSize
nextPos = nextPos + .escSize - .sigSize
End If
nextPos = InStrB(nextPos + .sigSize, temp, posChar)
End With
Loop
remainingLen = remainingLen - buffSize
posOffset = posOffset + buffSize
Erase buffSignaturePos
Loop Until remainingLen < 1
If outPos > 1 Then
diff = Len(str) - inPos + 1
If diff > 0 Then
Mid$(UnescapeUnicode, outPos, diff) = Mid$(str, inPos, diff)
End If
UnescapeUnicode = Left$(UnescapeUnicode, outPos + diff - 1)
End If
End Function
Private Function NewEscapes() As EscapeSequence()
Static escapes(0 To 6) As EscapeSequence
If escapes(0).ueFormat = [_efNone] Then
InitEscape escapes(0), efPython, "\U"
InitEscape escapes(1), efPython, "\u"
InitEscape escapes(2), efRust, "\U{"
InitEscape escapes(3), efRust, "\u{"
InitEscape escapes(4), efUPlus, "U+"
InitEscape escapes(5), efUPlus, "u+"
InitEscape escapes(6), efMarkup, "&#"
End If
NewEscapes = escapes
End Function
Private Sub InitEscape(ByRef escape As EscapeSequence, _
ByVal ueFormat As UnicodeEscapeFormat, _
ByRef ueSignature As String)
With escape
.ueFormat = ueFormat
.ueSignature = ueSignature
.sigSize = Len(ueSignature)
End With
End Sub
Private Sub TryPythonEscape(ByRef escape As EscapeSequence, ByRef str As String)
Const H As String = "[0-9A-Fa-f]"
Const PYTHON_ESCAPE_PATTERN_NOT_BMP = "00[01]" & H & H & H & H & H
Const PYTHON_ESCAPE_PATTERN_BMP As String = H & H & H & H & "*"
Dim potentialEscape As String
With escape
.unEscSize = 0
potentialEscape = Mid$(str, .currPosition + 2, 8) 'Exclude leading \[Uu]
If potentialEscape Like PYTHON_ESCAPE_PATTERN_NOT_BMP Then
.escSize = 10 '\[Uu]00[01]HHHHH
.codepoint = CLng("&H" & potentialEscape) 'No extra Mid$ needed
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then
.unEscSize = 1
Exit Sub
End If
ElseIf .codepoint < &H110000 Then
.unEscSize = 2
Exit Sub
End If
End If
If potentialEscape Like PYTHON_ESCAPE_PATTERN_BMP Then
.escSize = 6 '\[Uu]HHHH
.codepoint = CLng("&H" & Left$(potentialEscape, 4))
If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
End If
End With
End Sub
Private Function IsValidBMP(ByVal codepoint As Long, _
ByVal letSingleSurrogate As Boolean) As Boolean
IsValidBMP = (codepoint < &HD800& Or codepoint >= &HE000& Or letSingleSurrogate)
End Function
Private Sub TryRustEscape(ByRef escape As EscapeSequence, ByRef str As String)
Static rustEscPattern(1 To 6) As String
Static isPatternInit As Boolean
Dim potentialEscape As String
Dim nextBrace As Long
If Not isPatternInit Then
Dim i As Long
rustEscPattern(1) = "[0-9A-Fa-f]}*"
For i = 2 To 6
rustEscPattern(i) = "[0-9A-Fa-f]" & rustEscPattern(i - 1)
Next i
isPatternInit = True
End If
With escape
.unEscSize = 0
potentialEscape = Mid$(str, .currPosition + 3, 7) 'Exclude leading \[Uu]{
nextBrace = InStr(2, potentialEscape, "}", vbBinaryCompare)
If nextBrace = 0 Then Exit Sub
If Not potentialEscape Like rustEscPattern(nextBrace - 1) Then Exit Sub
.codepoint = CLng("&H" & Left$(potentialEscape, nextBrace - 1))
.escSize = nextBrace + 3
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
ElseIf .codepoint < &H110000 Then
.unEscSize = 2
End If
End With
End Sub
Private Sub TryUPlusEscape(ByRef escape As EscapeSequence, _
ByRef str As String)
Const H As String = "[0-9A-Fa-f]"
Const UPLUS_ESCAPE_PATTERN_4_DIGITS = H & H & H & H & "*"
Const UPLUS_ESCAPE_PATTERN_5_DIGITS = H & H & H & H & H & "*"
Const UPLUS_ESCAPE_PATTERN_6_DIGITS = H & H & H & H & H & H
Dim potentialEscape As String
With escape
.unEscSize = 0
potentialEscape = Mid$(str, .currPosition + 2, 6) 'Exclude leading [Uu]+
If potentialEscape Like UPLUS_ESCAPE_PATTERN_6_DIGITS Then
.escSize = 8
.codepoint = CLng("&H" & potentialEscape)
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then
.unEscSize = 1
Exit Sub
End If
ElseIf .codepoint < &H110000 Then
.unEscSize = 2
Exit Sub
End If
End If
If potentialEscape Like UPLUS_ESCAPE_PATTERN_5_DIGITS Then
.escSize = 7
.codepoint = CLng("&H" & Left$(potentialEscape, 5))
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then
.unEscSize = 1
Exit Sub
End If
Else
.unEscSize = 2
Exit Sub
End If
End If
If potentialEscape Like UPLUS_ESCAPE_PATTERN_4_DIGITS Then
.escSize = 6
.codepoint = CLng("&H" & Left$(potentialEscape, 4))
If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
End If
End With
End Sub
Private Sub TryMarkupEscape(ByRef escape As EscapeSequence, _
ByRef str As String)
Static mEscPattern(1 To 7) As String
Static isPatternInit As Boolean
Dim potentialEscape As String
Dim nextSemicolon As Long
If Not isPatternInit Then
Dim i As Long
For i = 1 To 6
mEscPattern(i) = String$(i, "#") & ";*"
Next i
mEscPattern(7) = "1######;"
isPatternInit = True
End If
With escape
.unEscSize = 0
potentialEscape = Mid$(str, .currPosition + 2, 8) 'Exclude leading &[#]
nextSemicolon = InStr(2, potentialEscape, ";", vbBinaryCompare)
If nextSemicolon = 0 Then Exit Sub
If Not potentialEscape Like mEscPattern(nextSemicolon - 1) Then Exit Sub
.codepoint = CLng(Left$(potentialEscape, nextSemicolon - 1))
.escSize = nextSemicolon + 2
If .codepoint < &H10000 Then
If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
ElseIf .codepoint < &H110000 Then
.unEscSize = 2
End If
End With
End Sub
'Returns the given unicode codepoint as standard VBA UTF-16LE string
Public Function ChrU(ByVal codepoint As Long, _
Optional ByVal allowSingleSurrogates As Boolean = False) As String
Const methodName As String = "ChrU"
Static st As TwoCharTemplate
Static lt As LongTemplate
If codepoint < &H8000 Then Err.Raise 5, methodName, "Codepoint < -32768"
If codepoint < 0 Then codepoint = codepoint And &HFFFF& 'Incase of uInt input
If codepoint < &HD800& Then
ChrU = ChrW$(codepoint)
ElseIf codepoint < &HE000& And Not allowSingleSurrogates Then
Err.Raise 5, methodName, "Range reserved for surrogate pairs"
ElseIf codepoint < &H10000 Then
ChrU = ChrW$(codepoint)
ElseIf codepoint < &H110000 Then
lt.l = (&HD800& Or (codepoint \ &H400& - &H40&)) _
Or (&HDC00 Or (codepoint And &H3FF&)) * &H10000 '&HDC00 with no &
LSet st = lt
ChrU = st.s
Else
Err.Raise 5, methodName, "Codepoint outside of valid Unicode range."
End If
End Function
'Returns a given characters unicode codepoint as long.
'Note: One unicode character can consist of two VBA "characters", a so-called
' "surrogate pair" (input string of length 2, so Len(char) = 2!)
Public Function AscU(ByRef char As String) As Long
AscU = AscW(char) And &HFFFF&
If Len(char) > 1 Then
Dim lo As Long: lo = AscW(Mid$(char, 2, 1)) And &HFFFF&
If &HDC00& > lo Or lo > &HDFFF& Then Exit Function
AscU = (AscU - &HD800&) * &H400& + (lo - &HDC00&) + &H10000
End If
End Function
'Function transcoding a VBA-native UTF-16LE encoded string to an ANSI string
'Note: Information will be lost for codepoints > 255!
Public Function EncodeANSI(ByRef utf16leStr As String) As String
Dim i As Long
Dim j As Long: j = 0
Dim utf16le() As Byte: utf16le = utf16leStr
Dim ansi() As Byte
ReDim ansi(1 To Len(utf16leStr))
For i = LBound(ansi) To UBound(ansi)
If utf16le(j + 1) = 0 Then
ansi(i) = utf16le(j)
j = j + 2
Else
ansi(i) = &H3F 'Chr(&H3F) = "?"
j = j + 2
End If
Next i
EncodeANSI = ansi
End Function
'Function transcoding an UTF-8 encoded string to the VBA-native UTF-16LE
'Function transcoding an VBA-native UTF-16LE encoded string to UTF-8
Public Function DecodeUTF8(ByRef utf8Str As String, _
Optional ByVal raiseErrors As Boolean = False) As String
Const methodName As String = "DecodeUTF8native"
Dim i As Long
Dim numBytesOfCodePoint As Byte
Static numBytesOfCodePoints(0 To 255) As Byte
Static mask(2 To 4) As Long
Static minCp(2 To 4) As Long
If numBytesOfCodePoints(0) = 0 Then
For i = &H0& To &H7F&: numBytesOfCodePoints(i) = 1: Next i '0xxxxxxx
'110xxxxx - C0 and C1 are invalid (overlong encoding)
For i = &HC2& To &HDF&: numBytesOfCodePoints(i) = 2: Next i
For i = &HE0& To &HEF&: numBytesOfCodePoints(i) = 3: Next i '1110xxxx
'11110xxx - 11110100, 11110101+ (= &HF5+) outside of valid Unicode range
For i = &HF0& To &HF4&: numBytesOfCodePoints(i) = 4: Next i
For i = 2 To 4: mask(i) = (2 ^ (7 - i) - 1): Next i
minCp(2) = &H80&: minCp(3) = &H800&: minCp(4) = &H10000
End If
Dim codepoint As Long
Dim currByte As Byte
Dim utf8() As Byte: utf8 = utf8Str
Dim utf16() As Byte: ReDim utf16(0 To (UBound(utf8) - LBound(utf8) + 1) * 2)
Dim j As Long: j = 0
Dim k As Long
i = LBound(utf8)
Do While i <= UBound(utf8)
codepoint = utf8(i)
numBytesOfCodePoint = numBytesOfCodePoints(codepoint)
If numBytesOfCodePoint = 0 Then
If raiseErrors Then Err.Raise 5, methodName, "Invalid byte"
GoTo insertErrChar
ElseIf numBytesOfCodePoint = 1 Then
utf16(j) = codepoint
j = j + 2
ElseIf i + numBytesOfCodePoint - 1 > UBound(utf8) Then
If raiseErrors Then Err.Raise 5, methodName, _
"Incomplete UTF-8 codepoint at end of string."
GoTo insertErrChar
Else
codepoint = utf8(i) And mask(numBytesOfCodePoint)
For k = 1 To numBytesOfCodePoint - 1
currByte = utf8(i + k)
If (currByte And &HC0&) = &H80& Then
codepoint = (codepoint * &H40&) + (currByte And &H3F)
Else
If raiseErrors Then _
Err.Raise 5, methodName, "Invalid continuation byte"
GoTo insertErrChar
End If
Next k
'Convert the Unicode codepoint to UTF-16LE bytes
If codepoint < minCp(numBytesOfCodePoint) Then
If raiseErrors Then Err.Raise 5, methodName, "Overlong encoding"
GoTo insertErrChar
ElseIf codepoint < &HD800& Then
utf16(j) = CByte(codepoint And &HFF&)
utf16(j + 1) = CByte(codepoint \ &H100&)
j = j + 2
ElseIf codepoint < &HE000& Then
If raiseErrors Then Err.Raise 5, methodName, _
"Invalid Unicode codepoint.(Range reserved for surrogate pairs)"
GoTo insertErrChar
ElseIf codepoint < &H10000 Then
If codepoint = &HFEFF& Then GoTo nextCp '(BOM - will be ignored)
utf16(j) = codepoint And &HFF&
utf16(j + 1) = codepoint \ &H100&
j = j + 2
ElseIf codepoint < &H110000 Then 'Calculate surrogate pair
Dim m As Long: m = codepoint - &H10000
Dim loSurrogate As Long: loSurrogate = &HDC00& Or (m And &H3FF)
Dim hiSurrogate As Long: hiSurrogate = &HD800& Or (m \ &H400&)
utf16(j) = hiSurrogate And &HFF&
utf16(j + 1) = hiSurrogate \ &H100&
utf16(j + 2) = loSurrogate And &HFF&
utf16(j + 3) = loSurrogate \ &H100&
j = j + 4
Else
If raiseErrors Then Err.Raise 5, methodName, _
"Codepoint outside of valid Unicode range"
insertErrChar: utf16(j) = &HFD
utf16(j + 1) = &HFF
j = j + 2
If numBytesOfCodePoint = 0 Then numBytesOfCodePoint = 1
End If
End If
nextCp: i = i + numBytesOfCodePoint 'Move to the next UTF-8 codepoint
Loop
DecodeUTF8 = MidB$(utf16, 1, j)
End Function
注意:
EncodeANSI
函数可能会像这样被“滥用”,因为转义字符串中的 UTF-8 字节将始终被解码为单字节 UTF-16 字符,因为根据定义,它们是单字节。这意味着 EncodeANSI
函数仅用于删除字符串中的每隔一个字节(由于 UTF-16 表示单字节字符的方式,这些字节都为空。)生成的字符串是您所输入的字符串的 UTF-8 表示形式。然后我们“解码”(转换为 UTF-16),因为这是 vb6 表示 Unicode 字符串的本机方式。
我也包含了
EscapeUnicode
函数,因此您可以看到字符串作为转义的 unicode 代码点实际上应该是什么样子:
actualEscapeSequence = EscapeUnicode(DecodeUTF8(EncodeANSI(UnescapeUnicode(StrResult))))
actualEscapeSequence
将等于“\u0633\u0644\u0627\u0645 \u062F\u0646\u06CC\u0627”,您可以在here确认它是正确的unicode转义序列。