我正在查看 https://github.com/jlaine/node-jpickle 以遍历 Javascript 中的 pickle 对象,这将有助于通过 Ajax 将二进制对象从 Python 发送到 Javascript。不幸的是,它使用 Node.js 来遍历称为 Buffer() 的东西,而我从 Ajax blob 获得了一个 ArrayBuffer。没问题,网上有几个地方有下面的代码,修改jpickle来使用这个应该很简单:
var ArrayBufferCursor = function() {
var ArrayBufferCursor = function(arrayBuffer) {
this.dataview = new DataView(arrayBuffer, 0);
this.size = arrayBuffer.byteLength;
this.index = 0;
}
ArrayBufferCursor.prototype.next = function(type) {
switch(type) {
case 'Uint8':
var result = this.dataview.getUint8(this.index);
this.index += 1;
return result;
case 'Int16':
var result = this.dataview.getInt16(this.index, true);
this.index += 2;
return result;
case 'Uint16':
var result = this.dataview.getUint16(this.index, true);
this.index += 2;
return result;
case 'Int32':
var result = this.dataview.getInt32(this.index, true);
this.index += 4;
return result;
case 'Uint32':
var result = this.dataview.getUint32(this.index, true);
this.index += 4;
return result;
case 'Float':
case 'Float32':
var result = this.dataview.getFloat32(this.index, true);
this.index += 4;
return result;
case 'Double':
case 'Float64':
var result = this.dataview.getFloat64(this.index, true);
this.index += 8;
return result;
default:
throw new Error("Unknown datatype");
}
};
ArrayBufferCursor.prototype.hasNext = function() {
return this.index < this.size;
}
return ArrayBufferCursor;
});
除了这个代码至少有一个拼写错误。例如,也许我应该删除最后一个括号?如果我这样做,Chrome 浏览器就会愉快地编译,这应该允许我这样做:
var cursor = new ArrayBufferCursor(arrayBuffer);
firstfloat = cursor.next('Float')
除了 Chrome 抱怨
cursor.next
不是函数。哎呀。我怀疑这段代码中的一个小错字导致它无法正常工作,但我无法解决。有任何想法吗?谢谢
这看起来不错。除了修复令人困惑的括号之外,要使用 pickle,你必须将浮点数设置为 BigEndian,而不是 Little,这让我傻了一会儿。
这些游标函数现在可以让您遍历 Pickle 对象!!
let ArrayBufferCursor = function(arrayBuffer) {
this.dataview = new DataView(arrayBuffer, 0);
this.size = arrayBuffer.byteLength;
this.index = 0;
}
ArrayBufferCursor.prototype.next = function(type) {
switch(type) {
case 'Char8':
var result = String.fromCharCode(this.dataview.getUint8(this.index));
this.index += 1;
return result;
case 'Uint8':
var result = this.dataview.getUint8(this.index);
this.index += 1;
return result;
case 'Int16':
var result = this.dataview.getInt16(this.index, true);
this.index += 2;
return result;
case 'Uint16':
var result = this.dataview.getUint16(this.index, true);
this.index += 2;
return result;
case 'Int32':
var result = this.dataview.getInt32(this.index, true);
this.index += 4;
return result;
case 'Uint32':
var result = this.dataview.getUint32(this.index, true);
this.index += 4;
return result;
case 'Float':
case 'Float32':
var result = this.dataview.getFloat32(this.index, false);
this.index += 4;
return result;
case 'Double':
case 'Float64':
var result = this.dataview.getFloat64(this.index, false);
this.index += 8;
return result;
default:
throw new Error("Unknown datatype");
}
};
ArrayBufferCursor.prototype.toString = function(n) {
const slice = new Uint8Array(this.dataview.buffer).subarray(this.index,this.index+n); //from current pointer get the next n bytes //convert to uint8
const bytesString = String.fromCharCode(...slice) //and then string
this.index += n;
return bytesString;
}
ArrayBufferCursor.prototype.readLine = function() {
const slice = new Uint8Array(this.dataview.buffer).subarray(this.index) // is this a data copy or just a pointer? The former could get expensive...
const n = this.size - this.index - 1 // is it really "-1" hope so. Anyway, the very last byte wont be a '\n' will it??
let i = 0
for (i = 0; i<n; i++) {
if (slice[i] == 10){ //i.e. ascii '\n'
break;}
}
if (i == n) {
throw "Could not find end of a line in pickle?!";
}
const bytesArray = new Uint8Array(this.dataview.buffer).subarray(this.index, this.index+i);
const bytesString = String.fromCharCode(...bytesArray);
this.index += i+1; // throw away the '\n'
return bytesString;
}
ArrayBufferCursor.prototype.hasNext = function() {
return this.index < this.size;
}
如果不清楚,其余的解决方案基本上是从以前发布的 Node.js 中复制的。但结果非常好。 python 端的 Pickle->Zlib 然后 Javascript 端的 Pako->Jpickle 提供了一个完整的机制来通过 Ajax 传输 Python 对象,而无需转到 Json 和字符串。要做的工作包括为 numpy 数组添加一个“假”模拟器....
var Parser = function() {
this.mark = 'THIS-NEEDS-TO-BE-UNIQUE-TO-SERVE-AS-A-BOUNDARY';
this.memo = {};
this.stack = [];
};
Parser.prototype.load = function(pickle) { // pickle is an ArrayBuffer
var MARK = '(' // push special markobject on stack
, STOP = '.' // every pickle ends with STOP
, POP = '0' // discard topmost stack item
, POP_MARK = '1' // discard stack top through topmost markobject
, DUP = '2' // duplicate top stack item
, FLOAT = 'F' // push float object; decimal string argument
, INT = 'I' // push integer or bool; decimal string argument
, BININT = 'J' // push 4-byte signed int
, BININT1 = 'K' // push 1-byte unsigned int
, LONG = 'L' // push long; decimal string argument
, BININT2 = 'M' // push 2-byte unsigned int
, NONE = 'N' // push None
// missing PERSID
// missing BINPERSID
, REDUCE = 'R' // apply callable to argtuple, both on stack
, STRING = 'S' // push string; NL-terminated string argument
, BINSTRING = 'T' // push string; counted binary string argument
, SHORT_BINSTRING = 'U' // " " ; " " " " < 256 bytes
, UNICODE = 'V' // push Unicode string; raw-unicode-escaped'd argument
, BINUNICODE = 'X' // " " " ; counted UTF-8 string argument
, APPEND = 'a' // append stack top to list below it
, BUILD = 'b' // build the entire value
, GLOBAL = 'c' // push self.find_class(modname, name); 2 string args
, DICT = 'd' // build a dict from stack items
, EMPTY_DICT = '}' // push empty dict
, APPENDS = 'e' // extend list on stack by topmost stack slice
, GET = 'g' // push item from memo on stack; index is string arg
, BINGET = 'h' // " " " " " " ; " " 1-byte arg
// missing INST
, LONG_BINGET = 'j' // push item from memo on stack; index is 4-byte arg
, LIST = 'l' // build list from topmost stack items
, EMPTY_LIST = ']' // push empty list
, OBJ = 'o' // build a class instance using the objects between here and the mark
, PUT = 'p' // store stack top in memo; index is string arg
, BINPUT = 'q' // " " " " " ; " " 1-byte arg
, LONG_BINPUT = 'r' // " " " " " ; " " 4-byte arg
, SETITEM = 's' // add key+value pair to dict
, TUPLE = 't' // build tuple from topmost stack items
, EMPTY_TUPLE = ')' // push empty tuple
, SETITEMS = 'u' // modify dict by adding topmost key+value pairs
, BINFLOAT = 'G' // push float; arg is 8-byte float encoding
// protocol 2
, PROTO = '\x80' // identify pickle protocol
, NEWOBJ = '\x81' // build object by applying cls.__new__ to argtuple
, TUPLE1 = '\x85' // build 1-tuple from stack top
, TUPLE2 = '\x86' // build 2-tuple from two topmost stack items
, TUPLE3 = '\x87' // build 3-tuple from three topmost stack items
, NEWTRUE = '\x88' // push True
, NEWFALSE = '\x89' // push False
, LONG1 = '\x8a' // push long from < 256 bytes
, LONG4 = '\x8b' // push really big long
// protocol 3
, BINBYTES = 'B' // push bytes; counted binary string argument
, SHORT_BINBYTES = 'C' // " " ; " " " " < 256 bytes
;
var cursor = new ArrayBufferCursor(pickle)
let i = 0;
let opcode = 0;
for (;cursor.hasNext();) {
opcode = cursor.next('Char8');
switch (opcode) {
// protocol 2
case PROTO:
var proto = cursor.next('Uint8');
if (proto !== 2 && proto !== 3)
throw 'Unhandled pickle protocol version: ' + proto;
break;
case TUPLE1:
var a = this.stack.pop();
this.stack.push([a]);
break;
case TUPLE2:
var b = this.stack.pop()
, a = this.stack.pop();
this.stack.push([a, b]);
break;
case TUPLE3:
var c = this.stack.pop()
, b = this.stack.pop()
, a = this.stack.pop();
this.stack.push([a, b, c]);
break;
case NEWTRUE:
this.stack.push(true);
break;
case NEWFALSE:
this.stack.push(false);
break;
case LONG1:
var length = cursor.next('Uint8');
this.stack.push(0);
break;
case LONG4: // UNTESTED, might be broken
var length = cursor.next('Uint32');
this.stack.push(0);
break;
case POP:
this.stack.pop();
break;
case POP_MARK:
var mark = this.marker();
this.stack = this.stack.slice(0, mark);
break;
case DUP:
var value = this.stack[this.stack.length-1];
this.stack.push(value);
break;
case EMPTY_DICT:
this.stack.push({});
break;
case EMPTY_LIST:
case EMPTY_TUPLE:
this.stack.push([]);
break;
case GET:
var index = cursor.readLine();
this.stack.push(this.memo[index]);
break;
case BINGET:
var index = cursor.next('Uint8');
this.stack.push(this.memo[''+index]);
break;
case LONG_BINGET:
var index = cursor.next('Uint32');
this.stack.push(this.memo[''+index]);
break;
case PUT:
var index = cursor.readLine();
this.memo[index] = this.stack[this.stack.length-1];
break;
case BINPUT:
var index = cursor.next('Uint8');;
this.memo['' + index] = this.stack[this.stack.length-1];
break;
case LONG_BINPUT:
var index = cursor.next('Uint32');
this.memo['' + index] = this.stack[this.stack.length-1];
break;
case GLOBAL:
var module = cursor.readLine();
var name = cursor.readLine();
var func = emulated[module + '.' + name]; //if it is a class then emulated must be called first
if (func === undefined) {
throw "Cannot emulate global: " + module + " " + name;
}
this.stack.push(func);
break;
case OBJ:
var obj = new (this.stack.pop())();
var mark = this.marker();
for (var pos = mark + 1; pos < this.stack.length; pos += 2) {
obj[this.stack[pos]] = this.stack[pos + 1];
}
this.stack = this.stack.slice(0, mark);
this.stack.push(obj);
break;
case BUILD:
var dict = this.stack.pop();
var obj = this.stack.pop();
for ( var p in dict ) {
obj[p] = dict[p];
}
this.stack.push(obj);
break;
case REDUCE:
var args = this.stack.pop();
var func = this.stack[this.stack.length - 1];
this.stack[this.stack.length - 1] = func(args);
break;
case INT:
var value = cursor.readLine();
if (value == '01')
this.stack.push(true);
else if (value == '00')
this.stack.push(false);
else
this.stack.push(parseInt(value));
break;
case BININT:
this.stack.push(cursor.next('Uint32'));
i += 4;
break;
case BININT1:
this.stack.push(cursor.next('Uint8'));
i += 1;
break;
case BININT2:
this.stack.push(cursor.next('Uint16'));
i += 2;
break;
case MARK:
this.stack.push(this.mark);
break;
case FLOAT:
var value = cursor.readLine();
i += value.length + 1;
this.stack.push(parseFloat(value));
break;
case LONG:
var value = cursor.readLine();
i += value.length + 1;
this.stack.push(parseInt(value));
break;
case BINFLOAT:
this.stack.push(cursor.next('Float64'));
i += 8;
break;
case STRING:
var value = cursor.readLine();
i += value.length + 1;
var quotes = "\"'";
if (value[0] == "'") {
if (value[value.length-1] != "'")
throw "insecure string pickle";
} else if (value[0] = '"') {
if (value[value.length-1] != '"')
throw "insecure string pickle";
} else {
throw "insecure string pickle";
}
this.stack.push(value.substr(1, value.length-2));
break;
case UNICODE:
var value = cursor.readLine();
i += value.length + 1;
this.stack.push(value);
break;
case BINSTRING:
case BINBYTES:
var length = cursor.next('Uint32');
i += 4;
this.stack.push(cursor.toString(length));
i += length;
break;
case SHORT_BINSTRING:
case SHORT_BINBYTES:
var length = cursor.next('Uint8');
this.stack.push(cursor.toString(length));
i += length;
break;
case BINUNICODE:
var length = cursor.next('Uint32');
i += 4;
this.stack.push(cursor.toString(length));
i += length;
break;
case APPEND:
var value = this.stack.pop();
this.stack[this.stack.length-1].push(value);
break;
case APPENDS:
var mark = this.marker(),
list = this.stack[mark - 1];
list.push.apply(list, this.stack.slice(mark + 1));
this.stack = this.stack.slice(0, mark);
break;
case SETITEM:
var value = this.stack.pop()
, key = this.stack.pop();
this.stack[this.stack.length-1][key] = value;
break;
case SETITEMS:
var mark = this.marker()
, obj = this.stack[mark - 1];
for (var pos = mark + 1; pos < this.stack.length; pos += 2) {
obj[this.stack[pos]] = this.stack[pos + 1];
}
this.stack = this.stack.slice(0, mark);
break;
case LIST:
case TUPLE:
var mark = this.marker()
, list = this.stack.slice(mark + 1);
this.stack = this.stack.slice(0, mark);
this.stack.push(list);
break;
case DICT:
var mark = this.marker()
obj = {};
for (var pos = mark + 1; pos < this.stack.length; pos += 2) {
obj[this.stack[pos]] = this.stack[pos + 1];
}
this.stack = this.stack.slice(0, mark);
this.stack.push(obj);
break;
case STOP:
return this.stack.pop();
case NONE:
this.stack.push(null);
break;
default:
throw "Unhandled opcode " + opcode + " " + String.fromCharCode(opcode) ;
}
}
};
Parser.prototype.marker = function(parser) {
var k = this.stack.length - 1
while (k > 0 && this.stack[k] !== this.mark) {
--k;
}
return k;
};
T.
试试这个https://github.com/ewfian/pickleparser
在线演示:在线 Pickle 到 JSON 转换器
const fileSelector = document.getElementById('file_selector');
const jsonResultPreviewer = document.getElementById('json_result_previewer');
fileSelector.addEventListener('change', function (e) {
const file = fileSelector.files[0];
const reader = new FileReader();
reader.onload = function (event) {
const buffer = new Uint8Array(event.target.result);
const parser = new pickleparser.Parser(buffer);
const obj = parser.load();
const json = JSON.stringify(obj, null, 4);
jsonResultPreviewer.innerText = json;
}
reader.readAsArrayBuffer(file);
});