我目前正在尝试创建一系列表格。前两个表填充得很好,因为每个条目都有与创建表时所用的键相匹配的键。
第一个表:1 行包含 3 列(对象、has_more、数据)
第二个表:少量行,11 列(对象、id、类型、updated_at、uri、名称、描述、大小、download_uri、content_type、content_encoding)
第三个表:使用与前面相同的方法,我拉出键并作为字符串连接以在“CREATE TABLE”语句中使用。这适用于前几个条目,但下一个 JSON 对象具有不同的键,这会导致错误,“表有 64 列,但提供了 60 个值”。我假设更多条目可能会发生这种情况,无论是更多还是更少的列/值。
代码如下:
import requests
import urllib
import json
import sqlite3
from os import path, replace
BulkDATAurl = "https://api.scryfall.com/bulk-data" # Store URL path as (str)
BulkDataJSON = json.load(urllib.request.urlopen(BulkDATAurl))
#OPEN DATABASE CONNECTION AND SET CURSOR
connection = sqlite3.connect("JSONTestDB")
cursor = connection.cursor()
#CREATE MAIN BULKDATA TABLE - this works but open to other suggestions
columns = ", ".join('"' + str(x) + '"' for x in BulkDataJSON.keys())
values = ", ".join('"' + str(x) + '"' for x in BulkDataJSON.values())
CreateTableCmd = "CREATE TABLE IF NOT EXISTS TestTable (" + columns + ")"
InsertTableCmd = "INSERT INTO %s (%s) VALUES (%s);" % ('TestTable', columns, values)
cursor.execute(CreateTableCmd)
cursor.execute(InsertTableCmd)
connection.commit()
#CREATE BULKDATA DATA TABLE - also works like previous
columns = ", ".join('"' + str(x) + '"' for x in BulkDataJSON['data'][0].keys())
CreateTableCmd = "CREATE TABLE IF NOT EXISTS TestTableData (" + columns + ")"
cursor.execute(CreateTableCmd)
i=0
for item in BulkDataJSON['data']:
values = ", ".join('"' + str(x) + '"' for x in BulkDataJSON['data'][i].values())
i += 1
InsertTableCmd = "INSERT INTO %s (%s) VALUES (%s);" % ('TestTableData', columns, values)
cursor.execute(InsertTableCmd)
connection.commit()
#CREATE TABLE FROM EXTRACTED URL - this is where my problem happens, 3rd item has less keys, some of which dont even match
oracleURL = BulkDataJSON['data'][2]['download_uri']
oracleJSON = json.load(urllib.request.urlopen(oracleURL))
columns = ", ".join('"' + str(x) + '"' for x in oracleJSON[00000].keys())
CreateTableCmd = "CREATE TABLE IF NOT EXISTS TestTableOracle (" + columns + ")"
cursor.execute(CreateTableCmd)
i=0
for item in oracleJSON[00000]:
values = ", ".join('"' + str(x).replace('"', '-') + '"' for x in oracleJSON[i].values())
i += 1
InsertTableCmd = "INSERT INTO %s (%s) VALUES (%s);" % ('TestTableOracle', columns, values)
cursor.execute(InsertTableCmd)
connection.commit()
connection.commit()
connection.close()
从这里我不完全确定该去哪里,或者如果有更有效的方法来执行我想做的事情,任何帮助将不胜感激。如果有帖子涵盖了这一点,我也很抱歉,我可能错过了它或误解了它。
现在我已经到达了一个实际的终端,这是对运行良好的代码的重构。我尝试在必要时添加评论。
无论如何,我们的想法是,一旦我们从 API 下载了文件,我们就会对其进行迭代以获取完整的列列表,基于该列表创建一个表,然后使用相同的列列表来生成行(其中对于该特定原始行中不存在的列,其中有
None
;默认情况下 SQLite 无法处理的数据会转回 JSON 进行存储)。
import json
import sqlite3
import requests
connection = sqlite3.connect("JSONTestDB.sqlite3")
def jsonify_complex_value(val):
if isinstance(val, (dict, list)):
return json.dumps(val)
return val
def create_table_from_list(table_name, list_url):
print(f"Downloading {table_name} from {list_url}")
data_resp = requests.get(list_url)
data_resp.raise_for_status()
data = data_resp.json()
assert isinstance(data, list)
# Get all the columns
all_columns = set()
for row in data:
all_columns.update(row.keys())
# Some columns may be reserved words in SQLite, so we need to quote them
# (but here's hoping there's nothing that needs more escaping)
columns_for_sql = ",".join(f'"{col}"' for col in all_columns)
qmarks_for_sql = ", ".join("?" * len(all_columns))
print(f"{table_name} has {len(all_columns)} columns and {len(data)} rows")
cursor = connection.cursor()
# Do everything here in a transaction for speed
cursor.execute("BEGIN TRANSACTION")
# Create the table
create_table_sql = (
f"CREATE TABLE IF NOT EXISTS {table_name} ({columns_for_sql})"
)
cursor.execute(create_table_sql)
# Create the insert statement
insert_sql = f"INSERT INTO {table_name} ({columns_for_sql}) VALUES ({qmarks_for_sql})"
# Define a generator that will replace missing columns with None;
# also, convert complex values to JSON.
row_gen = (
tuple(jsonify_complex_value(row.get(col)) for col in all_columns)
for row in data
)
# Insert the data by running `insert_sql` in bulk
cursor.executemany(insert_sql, row_gen)
# Commit the transaction, we're done
connection.commit()
def main():
bulk_data_resp = requests.get("https://api.scryfall.com/bulk-data")
bulk_data_resp.raise_for_status()
for obj in bulk_data_resp.json()["data"]:
table_name = obj["type"]
url = obj["download_uri"]
if table_name not in ("oracle_cards", "default_cards"):
# Skip things we don't care about for this example
print("Skipping", table_name)
continue
create_table_from_list(table_name, url)
if __name__ == "__main__":
main()
验证数据是否存在:
$ sqlite3 JSONTestDB.sqlite3 'select name from oracle_cards order by random() limit 1;'
Vraska the Unseen