使用 Zig,作为该语言的新手,尝试对文件执行一些简单的操作。
我有一个函数,其目的是获取文件的路径并返回已排序文件的路径。
该函数在清除文件中的特殊字符后获取文件的路径,并且每行只有一个单词。
运行下面的代码时,数组列表最终总是充满最后一个单词的分数。
const std = @import("std");
const GPA = std.heap.GeneralPurposeAllocator;
const log = std.log;
const File = std.fs.File;
pub fn sortFile(file_path: []u8, buffer: []u8) ![]u8 {
// creating general purpose allocator
var general_purpose_allocator = GPA(.{}){};
const gpa = general_purpose_allocator.allocator();
// Getting the path fitting base on OS
var encoded_path_buffer = gpa.alloc(u8, file_path.len) catch unreachable;
const encoded_file_sub_path = try encodePathForOs(file_path, encoded_path_buffer);
// Creating reader
var file_read: File = try cwd().openFile(encoded_file_sub_path, .{});
defer file_read.close();
const file_reader = file_read.reader();
gpa.free(encoded_new_file_sub_path);
// ArrayList for the words
var lines = std.ArrayList([]u8).init(gpa);
// Looping line by line (word per line) and inserting
while (try file_reader.readUntilDelimiterOrEof(buffer, '\n')) |line| {
log.warn("line: {s}\n", .{line});
lines.append(line) catch |err| {
return err;
};
log.warn("lines: {s}\n", .{lines.items});
}
const items = lines.items;
log.warn("items: {s}\n", .{items});
/// rest of function non relevant - for it to be reproducable:
defer lines.deinit();
return file_path;
}
Util — 操作系统功能编码:
const builtin = @import("builtin");
const os_tag = builtin.os.tag;
const unicode = std.unicode;
const std = @import("std");
fn encodePathForOs(path: []u8, encoded_path_buffer: []u8) ![]u8 {
if (os_tag == .windows) {
var i: usize = 0;
while (i < path.len) : (i += 1) {
const codepoint = try unicode.utf8Decode(path[i .. i + 1]);
_ = try unicode.wtf8Encode(codepoint, encoded_path_buffer[i..]);
}
return encoded_path_buffer;
} else {
return path;
}
}
输入文件:
hello
this
is
a
test
this
is
a
test
this
is
a
test
this
is
a
test
追加前每行的日志,以及每次迭代时ArrayList的状态:
line: hello
lines: { hello }
line: this
lines: { this
, this }
line: is
lines: { is
s
, is
s, is }
line: a
lines: { a
s
, a
s, a
, a }
line: test
lines: { test
, test, te, t, test }
line: this
lines: { this
, this, th, t, this, this }
line: is
lines: { is
s
, is
s, is, i, is
s, is
s, is }
line: a
lines: { a
s
, a
s, a
, a, a
s, a
s, a
, a }
line: test
lines: { test
, test, te, t, test, test, te, t, test }
line: this
lines: { this
, this, th, t, this, this, th, t, this, this }
line: is
lines: { is
s
, is
s, is, i, is
s, is
s, is, i, is
s, is
s, is }
line: a
lines: { a
s
, a
s, a
, a, a
s, a
s, a
, a, a
s, a
s, a
, a }
line: test
lines: { test
, test, te, t, test, test, te, t, test, test, te, t, test }
line: this
lines: { this
, this, th, t, this, this, th, t, this, this, th, t, this, this }
line: is
lines: { is
s
, is
s, is, i, is
s, is
s, is, i, is
s, is
s, is, i, is
s, is
s, is }
line: a
lines: { a
s
, a
s, a
, a, a
s, a
s, a
, a, a
s, a
s, a
, a, a
s, a
s, a
, a }
line: test
lines: { test
, test, te, t, test, test, te, t, test, test, te, t, test, test, te, t, test }
最后的ArrayList:
items: { test , test, te, t, test, test, te, t, test, test, te, t, test, test, te, t, test }
尝试了很多技巧。
怀疑enter这个词是一个指针而不是一个值,所以尝试克隆。
在附加之前尝试增加
ArrayList
大小。
尝试使用插入而不是附加索引,尝试将每个单词单独作为一个切片并插入它。当然,也寻找过类似的问题,但在 Zig 上却没有太多。
我很乐意解决这个问题,更想了解这里发生了什么。
谢谢!
所以显然问题在于复制内存 -
line
是一次又一次被覆盖的同一个缓冲区,因此条目都是指向同一位置的指针,随着循环的变化而变化。
为了解决这个问题,我使用了:
while (try file_reader.readUntilDelimiterOrEof(buffer, '\n')) |line| {
const new_line: []u8 = try gpa.alloc(u8, line.len);
@memcpy(new_line, line);
try lines.append(new_line);
}
将行复制到另一个内存缓冲区中。