我是一名经验丰富的 Python 开发人员,并且喜欢它的许多便利。我实际上了解 C# 一段时间了,但最近开始接触一些更高级的编码。
我想知道是否有一种方法可以将 C# 中的字节数组“解析”为一组(不同大小的)项目。
想象一下我们有这个:
Python:
import struct
byteArray = "\xFF\xFF\x00\x00\x00\xFF\x01\x00\x00\x00"
numbers = struct.unpack("<LHL",byteArray)
print numbers[0] # 65535
print numbers[1] # 255
print numbers[2] # 1
newNumbers = [0, 255, 1023]
byteArray = struct.pack("<HHL",newNumbers)
print byteArray # '\x00\x00\xFF\x00\xFF\x03\x00\x00'
我想在 C# 中实现同样的效果,而不需要像这样使用大量、混乱的代码:
C#:
byte[] byteArray = new byte[] { 255, 255, 0, 0, 0, 255, 1, 0, 0, 0 };
byte[] temp;
int[] values = new int[3];
temp = new byte[4];
Array.Copy(byteArray, 0, temp, 0, 4);
values[0] = BitConverter.ToInt32(temp);
temp = new byte[2];
Array.Copy(byteArray, 4, temp, 0, 2);
values[1] = BitConverter.ToInt16(temp);
temp = new byte[4];
Array.Copy(byteArray, 8, temp, 0, 4);
values[2] = BitConverter.ToInt32(temp);
// Now values contains an array of integer values.
// It would be OK to assume a common maximum (e.g. Int64) and just cast up to that,
// but we still have to consider the size of the source bytes.
// Now the other way.
int[] values = new int[] { 0, 255, 1023 };
byteArray = new byte[8];
temp = BitConverter.GetBytes(values[0]);
Array.Copy(temp,2,byteArray,0,2);
temp = BitConverter.GetBytes(values[1]);
Array.Copy(temp,2,byteArray,2,2);
temp = BitConverter.GetBytes(values[2]);
Array.Copy(temp,0,byteArray,4,4);
显然,我拥有的 C# 代码是非常特定的,并且无论如何都不能真正重用。
建议?
我最终编写了自己的类来处理这个问题。它相当复杂,但似乎确实有效。它也不完整,但它可以满足我目前的需要。欢迎使用,如果有什么好的改进请告诉我。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Diagnostics;
// This is a crude implementation of a format string based struct converter for C#.
// This is probably not the best implementation, the fastest implementation, the most bug-proof implementation, or even the most functional implementation.
// It's provided as-is for free. Enjoy.
public class StructConverter
{
// We use this function to provide an easier way to type-agnostically call the GetBytes method of the BitConverter class.
// This means we can have much cleaner code below.
private static byte[] TypeAgnosticGetBytes(object o)
{
if (o is int) return BitConverter.GetBytes((int)o);
if (o is uint) return BitConverter.GetBytes((uint)o);
if (o is long) return BitConverter.GetBytes((long)o);
if (o is ulong) return BitConverter.GetBytes((ulong)o);
if (o is short) return BitConverter.GetBytes((short)o);
if (o is ushort) return BitConverter.GetBytes((ushort)o);
if (o is byte || o is sbyte) return new byte[] { (byte)o };
throw new ArgumentException("Unsupported object type found");
}
private static string GetFormatSpecifierFor(object o)
{
if (o is int) return "i";
if (o is uint) return "I";
if (o is long) return "q";
if (o is ulong) return "Q";
if (o is short) return "h";
if (o is ushort) return "H";
if (o is byte) return "B";
if (o is sbyte) return "b";
throw new ArgumentException("Unsupported object type found");
}
/// <summary>
/// Convert a byte array into an array of objects based on Python's "struct.unpack" protocol.
/// </summary>
/// <param name="fmt">A "struct.pack"-compatible format string</param>
/// <param name="bytes">An array of bytes to convert to objects</param>
/// <returns>Array of objects.</returns>
/// <remarks>You are responsible for casting the objects in the array back to their proper types.</remarks>
public static object[] Unpack(string fmt, byte[] bytes)
{
Debug.WriteLine("Format string is length {0}, {1} bytes provided.", fmt.Length, bytes.Length);
// First we parse the format string to make sure it's proper.
if (fmt.Length < 1) throw new ArgumentException("Format string cannot be empty.");
bool endianFlip = false;
if (fmt.Substring(0, 1) == "<")
{
Debug.WriteLine(" Endian marker found: little endian");
// Little endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == false) endianFlip = true;
fmt = fmt.Substring(1);
}
else if (fmt.Substring(0, 1) == ">")
{
Debug.WriteLine(" Endian marker found: big endian");
// Big endian.
// Do we need to flip endianness?
if (BitConverter.IsLittleEndian == true) endianFlip = true;
fmt = fmt.Substring(1);
}
// Now, we find out how long the byte array needs to be
int totalByteLength = 0;
foreach (char c in fmt.ToCharArray())
{
Debug.WriteLine(" Format character found: {0}", c);
switch (c)
{
case 'q':
case 'Q':
totalByteLength += 8;
break;
case 'i':
case 'I':
totalByteLength += 4;
break;
case 'h':
case 'H':
totalByteLength += 2;
break;
case 'b':
case 'B':
case 'x':
totalByteLength += 1;
break;
default:
throw new ArgumentException("Invalid character found in format string.");
}
}
Debug.WriteLine("Endianness will {0}be flipped.", (object) (endianFlip == true ? "" : "NOT "));
Debug.WriteLine("The byte array is expected to be {0} bytes long.", totalByteLength);
// Test the byte array length to see if it contains as many bytes as is needed for the string.
if (bytes.Length != totalByteLength) throw new ArgumentException("The number of bytes provided does not match the total length of the format string.");
// Ok, we can go ahead and start parsing bytes!
int byteArrayPosition = 0;
List<object> outputList = new List<object>();
byte[] buf;
Debug.WriteLine("Processing byte array...");
foreach (char c in fmt.ToCharArray())
{
switch (c)
{
case 'q':
outputList.Add((object)(long)BitConverter.ToInt64(bytes,byteArrayPosition));
byteArrayPosition+=8;
Debug.WriteLine(" Added signed 64-bit integer.");
break;
case 'Q':
outputList.Add((object)(ulong)BitConverter.ToUInt64(bytes,byteArrayPosition));
byteArrayPosition+=8;
Debug.WriteLine(" Added unsigned 64-bit integer.");
break;
case 'l':
outputList.Add((object)(int)BitConverter.ToInt32(bytes, byteArrayPosition));
byteArrayPosition+=4;
Debug.WriteLine(" Added signed 32-bit integer.");
break;
case 'L':
outputList.Add((object)(uint)BitConverter.ToUInt32(bytes, byteArrayPosition));
byteArrayPosition+=4;
Debug.WriteLine(" Added unsignedsigned 32-bit integer.");
break;
case 'h':
outputList.Add((object)(short)BitConverter.ToInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
Debug.WriteLine(" Added signed 16-bit integer.");
break;
case 'H':
outputList.Add((object)(ushort)BitConverter.ToUInt16(bytes, byteArrayPosition));
byteArrayPosition += 2;
Debug.WriteLine(" Added unsigned 16-bit integer.");
break;
case 'b':
buf = new byte[1];
Array.Copy(bytes,byteArrayPosition,buf,0,1);
outputList.Add((object)(sbyte)buf[0]);
byteArrayPosition++;
Debug.WriteLine(" Added signed byte");
break;
case 'B':
buf = new byte[1];
Array.Copy(bytes, byteArrayPosition, buf, 0, 1);
outputList.Add((object)(byte)buf[0]);
byteArrayPosition++;
Debug.WriteLine(" Added unsigned byte");
break;
case 'x':
byteArrayPosition++;
Debug.WriteLine(" Ignoring a byte");
break;
default:
throw new ArgumentException("You should not be here.");
}
}
return outputList.ToArray();
}
/// <summary>
/// Convert an array of objects to a byte array, along with a string that can be used with Unpack.
/// </summary>
/// <param name="items">An object array of items to convert</param>
/// <param name="LittleEndian">Set to False if you want to use big endian output.</param>
/// <param name="NeededFormatStringToRecover">Variable to place an 'Unpack'-compatible format string into.</param>
/// <returns>A Byte array containing the objects provided in binary format.</returns>
public static byte[] Pack(object[] items, bool LittleEndian, out string NeededFormatStringToRecover)
{
// make a byte list to hold the bytes of output
List<byte> outputBytes = new List<byte>();
// should we be flipping bits for proper endinanness?
bool endianFlip = (LittleEndian != BitConverter.IsLittleEndian);
// start working on the output string
string outString = (LittleEndian == false ? ">" : "<");
// convert each item in the objects to the representative bytes
foreach (object o in items)
{
byte[] theseBytes = TypeAgnosticGetBytes(o);
if (endianFlip == true) theseBytes = (byte[])theseBytes.Reverse();
outString += GetFormatSpecifierFor(o);
outputBytes.AddRange(theseBytes);
}
NeededFormatStringToRecover = outString;
return outputBytes.ToArray();
}
public static byte[] Pack(object[] items)
{
string dummy = "";
return Pack(items, true, out dummy);
}
}
BinaryWriter 和 BinaryReader 会将任意项目发送到字节数组或从字节数组读取任意项目
var str = new MemoryStream();
var bw = new BinaryWriter(str);
bw.Write(42);
bw.Write("hello");
...
var bytes = str.ToArray();
.NET(因此,C#)具有
Marshal.StructureToPtr
和 Marshal.PtrToStructure
方法。
您可以滥用这些将原始内存投射到
struct
,就像在 C 中一样,但我不建议这样做(因为它不完全可移植)。您还需要将 Byte[]
数组缓冲区放入本机堆中,以便对其执行操作:
public static T FromBuffer<T>( Byte[] buffer )
where T : struct, new()
{
T temp = new T();
Int32 size = Marshal.SizeOf( temp );
IntPtr ptr = Marshal.AllocHGlobal( size );
try
{
Marshal.Copy( buffer, 0, ptr, size );
T ret = (T)Marshal.PtrToStructure( ptr, temp.GetType() );
return ret;
}
finally
{
Marshal.FreeHGlobal( ptr );
}
}
如果我要建议我的解决方案,我会建议我编写的这个解决方案,它的执行方式比其他解决方案更接近 Python。 当然,使用 Python 的动态类型实现起来要容易得多。
它错过了那些浮点格式(但我想它们可以很容易地添加?)。
using System.Runtime.CompilerServices;
namespace TestApp;
public static class StructPacker
{
/// <summary>
/// Packs the values according to the provided format
/// </summary>
/// <param name="format">Format matching Python's struct.pack: https://docs.python.org/3/library/struct.html</param>
/// <param name="values">Values to pack</param>
/// <returns>Byte array containing packed values</returns>
/// <exception cref="InvalidOperationException">Thrown when values array doesn't have enough entries to match the format</exception>
public static byte[] Pack(string format, params object[] values)
{
var builder = new BinaryArrayBuilder();
var littleEndian = true;
var valueCtr = 0;
foreach (var ch in format)
{
if (ch == '<')
{
littleEndian = true;
}
else if (ch == '>')
{
littleEndian = false;
}
else if (ch == 'x')
{
builder.AppendByte(0x00);
}
else
{
if (valueCtr >= values.Length)
throw new InvalidOperationException("Provided too little values for given format string");
var (formatType, _) = GetFormatType(ch);
var value = Convert.ChangeType(values[valueCtr], formatType);
var bytes = TypeAgnosticGetBytes(value);
var endianFlip = littleEndian != BitConverter.IsLittleEndian;
if (endianFlip)
bytes = (byte[])bytes.Reverse();
builder.AppendBytes(bytes);
valueCtr++;
}
}
return builder.ToArray();
}
/// <summary>
/// Unpacks data from byte array to tuple according to format provided
/// </summary>
/// <typeparam name="T">Tuple type to return values in</typeparam>
/// <param name="data">Bytes that should contain your values</param>
/// <returns>Tuple containing unpacked values</returns>
/// <exception cref="InvalidOperationException">Thrown when values array doesn't have enough entries to match the format</exception>
public static T Unpack<T>(string format, byte[] data)
where T : ITuple
{
List<object> resultingValues = new List<object>();
var littleEndian = true;
var valueCtr = 0;
var dataIx = 0;
var tupleType = typeof(T);
foreach(var ch in format)
{
if (ch == '<')
{
littleEndian = true;
}
else if (ch == '>')
{
littleEndian = false;
}
else if (ch == 'x')
{
dataIx++;
}
else
{
if (valueCtr >= tupleType.GenericTypeArguments.Length)
throw new InvalidOperationException("Provided too little tuple arguments for given format string");
var (formatType, formatSize) = GetFormatType(ch);
var valueBytes = data[dataIx..(dataIx + formatSize)];
var endianFlip = littleEndian != BitConverter.IsLittleEndian;
if (endianFlip)
valueBytes = (byte[])valueBytes.Reverse();
var value = TypeAgnosticGetValue(formatType, valueBytes);
var genericType = tupleType.GenericTypeArguments[valueCtr];
if (genericType == typeof(bool))
resultingValues.Add(value);
else
resultingValues.Add(Convert.ChangeType(value, genericType));
valueCtr++;
dataIx += formatSize;
}
}
if (resultingValues.Count != tupleType.GenericTypeArguments.Length)
throw new InvalidOperationException("Mismatch between generic argument count and pack format");
var constructor = tupleType.GetConstructor(tupleType.GenericTypeArguments);
return (T)constructor!.Invoke(resultingValues.ToArray());
}
/// <summary>
/// Used to unpack single value from byte array. Shorthand to not have to declare and deconstruct tuple in your code
/// </summary>
/// <typeparam name="TValue">Type of value you need</typeparam>
/// <param name="data">Bytes that should contain your values</param>
/// <returns>Value unpacked from data</returns>
/// <exception cref="InvalidOperationException">Thrown when values array doesn't have enough entries to match the format</exception>
public static TValue UnpackSingle<TValue>(string format, byte[] data)
{
var templateTuple = new ValueTuple<TValue>(default!);
var unpackResult = Unpack(templateTuple, format, data);
return unpackResult.Item1;
}
/// <summary>
/// Workaround for language limitations XD Couldn't call Unpack<(T value)>(format, data) in UnpackSingle
/// </summary>
private static T Unpack<T>(T _, string format, byte[] data)
where T : ITuple
{
return Unpack<T>(format, data);
}
private static (Type type, int size) GetFormatType(char formatChar)
{
return formatChar switch
{
'i' => (typeof(int), sizeof(int)),
'I' => (typeof(uint), sizeof(uint)),
'q' => (typeof(long), sizeof(long)),
'Q' => (typeof(ulong), sizeof(ulong)),
'h' => (typeof(short), sizeof(short)),
'H' => (typeof(ushort), sizeof(ushort)),
'b' => (typeof(sbyte), sizeof(sbyte)),
'B' => (typeof(byte), sizeof(byte)),
'?' => (typeof(bool), 1),
_ => throw new InvalidOperationException("Unknown format char"),
};
}
// We use this function to provide an easier way to type-agnostically call the GetBytes method of the BitConverter class.
// This means we can have much cleaner code below.
private static byte[] TypeAgnosticGetBytes(object o)
{
if (o is bool b) return b ? new byte[] { 0x01 } : new byte[] { 0x00 };
if (o is int x) return BitConverter.GetBytes(x);
if (o is uint x2) return BitConverter.GetBytes(x2);
if (o is long x3) return BitConverter.GetBytes(x3);
if (o is ulong x4) return BitConverter.GetBytes(x4);
if (o is short x5) return BitConverter.GetBytes(x5);
if (o is ushort x6) return BitConverter.GetBytes(x6);
if (o is byte || o is sbyte) return new byte[] { (byte)o };
throw new ArgumentException("Unsupported object type found");
}
private static object TypeAgnosticGetValue(Type type, byte[] data)
{
if (type == typeof(bool)) return data[0] > 0;
if (type == typeof(int)) return BitConverter.ToInt32(data, 0);
if (type == typeof(uint)) return BitConverter.ToUInt32(data, 0);
if (type == typeof(long)) return BitConverter.ToInt64(data, 0);
if (type == typeof(ulong)) return BitConverter.ToUInt64(data, 0);
if (type == typeof(short)) return BitConverter.ToInt16(data, 0);
if (type == typeof(ushort)) return BitConverter.ToUInt16(data, 0);
if (type == typeof(byte)) return data[0];
if (type == typeof(sbyte)) return (sbyte)data[0];
throw new ArgumentException("Unsupported object type found");
}
}
您还需要我正在使用的另一个类(或将其更改为直接使用 MemoryStream),
BinaryArrayBuilder
,其想法是通过包装 MemoryStream 来模仿 StringBuilder,因此它可以轻松使用,并且比 List<byte>
更高效
namespace TestApp;
public class BinaryArrayBuilder
{
private readonly MemoryStream _innerStream;
public BinaryArrayBuilder()
{
_innerStream = new MemoryStream();
}
public BinaryArrayBuilder(byte[] initialBuffer)
{
_innerStream = new MemoryStream(initialBuffer);
}
public void AppendByte(byte value)
{
_innerStream.WriteByte(value);
}
public void AppendBytes(byte[] values)
{
_innerStream.Write(values);
}
public void AppendValues(string format, params object[] values)
{
AppendBytes(StructPacker.Pack(format, values));
}
public byte[] ToArray() => _innerStream.ToArray();
}
你使用它的方式与 Python 非常接近,就像我使用静态类型的 C# 一样:
StructPacker.Pack("<BBi", 0x83, 123, 12345678);
var (pid, dataLength) = StructPacker.Unpack<(int, byte)>(">IB", readBytes);
var anotherUnpack = StructPacker.Unpack<(int pid, byte length)>(">IB", readBytes);
Console.WriteLine($"PID: {anotherUnpack.pid}, Len: {anotherUnpack.length}");
var singleInt = StructPacker.Unpack<int>("<i", data);
顺便说一句,我正在使用 .NET 7 和最新的语言功能,所以如果它不能直接为您编译,您可能需要一些调整;)
遗憾的是,此类功能并未通过 BitConverter 类内置到 .NET 中。
像这样的例子, python 中的 struct.pack("H", 739) 相当于 c# 中的 BitConverter.GetBytes((ushort)739); python 中的 struct.unpack("H") 相当于 c# 中的 BitConverter.ToUInt16()。