我有一个包含以下信息的JSON文件:
{
"type" : "record",
"name" : "warranty",
"doc" : "Schema generated by Kite",
"fields" : [ {
"name" : "id",
"type" : "long",
"doc" : "Type inferred from '1'"
}, {
"name" : "train_id",
"type" : "long",
"doc" : "Type inferred from '21691'"
}, {
"name" : "siemens_nr",
"type" : "string",
"doc" : "Type inferred from 'Loco-001'"
}, {
"name" : "uic_nr",
"type" : "long",
"doc" : "Type inferred from '193901'"
}, {
"name" : "Configuration",
"type" : "string",
"doc" : "Type inferred from 'ZP28'"
}, {
"name" : "Warranty_Status",
"type" : "string",
"doc" : "Type inferred from 'Out_of_Warranty'"
}, {
"name" : "Warranty_Data_Type",
"type" : "string",
"doc" : "Type inferred from 'Real_based_on_preliminary_acceptance_date'"
}, {
"name" : "of_progression",
"type" : "long",
"doc" : "Type inferred from '100'"
}, {
"name" : "Delivery_Date",
"type" : "string",
"doc" : "Type inferred from '18/12/2009'"
}, {
"name" : "Warranty_on_Delivery_Date",
"type" : "string",
"doc" : "Type inferred from '18/12/2013'"
}, {
"name" : "Customer_Status",
"type" : "string",
"doc" : "Type inferred from 'homologation'"
}, {
"name" : "Commissioning_Date",
"type" : "string",
"doc" : "Type inferred from '6/10/2010'"
}, {
"name" : "Preliminary_acceptance_date",
"type" : "string",
"doc" : "Type inferred from '6/01/2011'"
}, {
"name" : "Warranty_Start_Date",
"type" : "string",
"doc" : "Type inferred from '6/01/2011'"
}, {
"name" : "Warranty_End_Date",
"type" : "string",
"doc" : "Type inferred from '6/01/2013'"
}, {
"name" : "Effective_End_Warranty_Date",
"type" : [ "null", "string" ],
"doc" : "Type inferred from 'null'",
"default" : null
}, {
"name" : "Level_2_in_function",
"type" : "string",
"doc" : "Type inferred from '17/07/2015'"
}, {
"name" : "Baseline",
"type" : "string",
"doc" : "Type inferred from '2.10.23.4'"
}, {
"name" : "RELN_revision",
"type" : "string",
"doc" : "Type inferred from '0434-26.3'"
}, {
"name" : "TC_report",
"type" : "string",
"doc" : "Type inferred from 'A480140'"
}, {
"name" : "Last_version_Date",
"type" : "string",
"doc" : "Type inferred from 'A-23/09/2015'"
}, {
"name" : "ETCS_ID_NID_Engine",
"type" : [ "null", "long" ],
"doc" : "Type inferred from '13001'",
"default" : null
}, {
"name" : "Item_Type",
"type" : "string",
"doc" : "Type inferred from 'Item'"
}, {
"name" : "Path",
"type" : "string",
"doc" : "Type inferred from 'sites/TrWMTISnerc_Community/Lists/X4Trains'"
} ]
}
我想创建一个SQL查询,允许您在fields实体中创建一个包含现有信息的表。为此,我创建了这个python代码:
import json
from pprint import pprint
import string
with open('/data/my-data/archive/in/test_warranty_data.csv.txt.avro.txt', 'r') as f:
data = json.load(f)
fields = data["fields"]
sentence="create table "+data["name"]+" if not exist("
for field in fields:
if isinstance(field["type"],list):
sentence += str(field["name"])+" "+ str(field["type"][-1])+" ,"
else:
sentence += str(field["name"])+" "+ str(field["type"])+" ,"
sentence=sentence[0:-1]+")"
sentence = string.replace(sentence, 'long', 'float')
sentence = string.replace(sentence, 'string', 'varchar(255)')
print sentence
它返回这个结果:
create table warranty if not exist(id float ,train_id float ,siemens_nr varchar(255) ,uic_nr float ,Configuration varchar(255) ,Warranty_Status varchar(255) ,Warranty_Data_Type varchar(255) ,of_progression float ,Delivery_Date varchar(255) ,Warranty_on_Delivery_Date varchar(255) ,Customer_Status varchar(255) ,Commissioning_Date varchar(255) ,Preliminary_acceptance_date varchar(255) ,Warranty_Start_Date varchar(255) ,Warranty_End_Date varchar(255) ,Effective_End_Warranty_Date varchar(255) ,Level_2_in_function varchar(255) ,Baseline varchar(255) ,RELN_revision varchar(255) ,TC_report varchar(255) ,Last_version_Date varchar(255) ,ETCS_ID_NID_Engine float ,Item_Type varchar(255) ,Path varchar(255) )
代码运作良好,但我想改进它,使它更美丽。你有什么建议吗?谢谢
你可以替换这个部分:
fields = data["fields"]
sentence="create table "+data["name"]+" if not exist("
for field in fields:
if isinstance(field["type"],list):
sentence += str(field["name"])+" "+ str(field["type"][-1])+" ,"
else:
sentence += str(field["name"])+" "+ str(field["type"])+" ,"
sentence=sentence[0:-1]+")"
有点像python的东西:
sentence="create table {name} if not exist({fields})".format(
name = data["name"],
fields = ', '.join(i["name"]+" "+i["type"]
if not isinstance(i["type"],list)
else i["name"]+" "+i["type"][-1]
for i in data["fields"])
)