我有下面的代码将 2 个数据帧列合并到一个数据帧中: 其中第一个包含预测文本 而另一部分则包含真实值
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
def load_jsonl(text_path):
return pd.read_json(
path_or_buf = text_path,
lines=True
)
# get the path/directory
working_dir = "/content/drive/MyDrive/Class_B/"
df = load_jsonl(f'{working_dir}labels.jsonl')
# Using readlines()
file1 = open(f'{working_dir}3rd_col.txt', 'r')
Lines = file1.readlines()
col_3rd = pd.DataFrame(Lines, columns=['Ground_truth'])
result = pd.concat([df, col_3rd ], axis=1)
import json
reddit = result.to_dict(orient= "records")
print(type(reddit) , len(reddit))
with open(f"{working_dir}Class_B.jsonl","w") as f:
for line in reddit:
f.write(json.dumps(line,ensure_ascii=False) + "\n")
json行格式的输入文件
labels.jsonl
{"image_name": "1.JPG", "text": "Flattery is words of kindness for a"}
{"image_name": "2.JPG", "text": "potential favor."}
第三栏名称
3rd_col.txt
Flattery is words of kindness for a
potential favor.
我得到的结果文件如下
Class_B.jsonl
:
{"image_name": "1.JPG", "text": "Flattery is words of kindness for a", "Ground_truth": "Flattery is words of kindness for a \n"}
{"image_name": "2.JPG", "text": "potential favor.", "Ground_truth": "potential favor. \n"}
预期结果:应该看起来没有
\n
{"image_name": "1.JPG", "text": "Flattery is words of kindness for a", "Ground_truth": "Flattery is words of kindness for a"}
{"image_name": "2.JPG", "text": "potential favor.", "Ground_truth": "potential favor."}
感谢@dkruit回答
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
def load_jsonl(text_path):
return pd.read_json(
path_or_buf = text_path,
lines=True
)
# get the path/directory
working_dir = "/content/drive/MyDrive/Class_B/"
df = load_jsonl(f'{working_dir}labels.jsonl')
# Using readlines()
file1 = open(f'{working_dir}3rd_col.txt', 'r')
Lines = file1.read().splitlines()
col_3rd = pd.DataFrame(Lines, columns=['Ground_truth'])
result = pd.concat([df, col_3rd ], axis=1)
import json
reddit = result.to_dict(orient= "records")
with open(f"{working_dir}Class_B.jsonl","w") as f:
for line in reddit:
f.write(json.dumps(line,ensure_ascii=False) + "\n")