我正在尝试将 multiqc_data.json 文件转换为 tsv 文件。具体来说,我只需要“report_general_stats_data”部分中的部分数据。其结构为:
{
"report_general_stats_data": [
{
"contigs": {
"# contigs (>= 0 bp)": 232.0,
"# contigs (>= 1000 bp)": 169.0,
"# contigs (>= 5000 bp)": 121.0,
"# contigs (>= 10000 bp)": 107.0,
"# contigs (>= 25000 bp)": 75.0,
"# contigs (>= 50000 bp)": 37.0,
"Total length (>= 0 bp)": 5594769.0,
"Total length (>= 1000 bp)": 5569601.0,
"Total length (>= 5000 bp)": 5452934.0,
"Total length (>= 10000 bp)": 5349401.0,
"Total length (>= 25000 bp)": 4801158.0,
"Total length (>= 50000 bp)": 3427439.0,
"# contigs": 187.0,
"Largest contig": 216595.0,
"Total length": 5582595.0,
"Reference length": 5472672.0,
"GC (%)": 57.1,
"Reference GC (%)": 57.37,
"N50": 63236.0,
"NG50": 64616.0,
"N90": 19080.0,
"NG90": 21621.0,
"auN": 81138.9,
"auNG": 82768.6,
"L50": 26.0,
"LG50": 25.0,
"L90": 86.0,
"LG90": 81.0,
"# N's per 100 kbp": 0.0
}
},
{
"24-240_L001_R1_001": {
"filtering_result_passed_filter_reads": 1106872.0,
"filtering_result_low_quality_reads": 32142.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.10114,
"after_filtering_total_reads": 1106872.0,
"after_filtering_total_bases": 296659710.0,
"after_filtering_q20_bases": 268093576.0,
"after_filtering_q30_bases": 233354655.0,
"after_filtering_q20_rate": 0.903707,
"after_filtering_q30_rate": 0.786607,
"after_filtering_read1_mean_length": 268.0,
"after_filtering_read2_mean_length": 267.0,
"after_filtering_gc_content": 0.565903,
"before_filtering_total_reads": 1139014.0,
"pct_surviving": 97.1780856073762,
"adapter_cutting_adapter_trimmed_reads": 60616.0,
"adapter_cutting_adapter_trimmed_bases": 1262446.0,
"pct_adapter": 5.321795869058677
},
"24-278_L001_R1_001": {
"filtering_result_passed_filter_reads": 2161572.0,
"filtering_result_low_quality_reads": 50870.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.20167800000000002,
"after_filtering_total_reads": 2161572.0,
"after_filtering_total_bases": 529325786.0,
"after_filtering_q20_bases": 491653949.0,
"after_filtering_q30_bases": 442959345.0,
"after_filtering_q20_rate": 0.928831,
"after_filtering_q30_rate": 0.836837,
"after_filtering_read1_mean_length": 244.0,
"after_filtering_read2_mean_length": 244.0,
"after_filtering_gc_content": 0.560734,
"before_filtering_total_reads": 2212442.0,
"pct_surviving": 97.70073068582137,
"adapter_cutting_adapter_trimmed_reads": 187672.0,
"adapter_cutting_adapter_trimmed_bases": 4596580.0,
"pct_adapter": 8.48257265049208
},
"24-288_L001_R1_001": {
"filtering_result_passed_filter_reads": 1531162.0,
"filtering_result_low_quality_reads": 51164.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.11451499999999999,
"after_filtering_total_reads": 1531162.0,
"after_filtering_total_bases": 409223893.0,
"after_filtering_q20_bases": 375027676.0,
"after_filtering_q30_bases": 333000899.0,
"after_filtering_q20_rate": 0.916436,
"after_filtering_q30_rate": 0.813738,
"after_filtering_read1_mean_length": 267.0,
"after_filtering_read2_mean_length": 267.0,
"after_filtering_gc_content": 0.562734,
"before_filtering_total_reads": 1582326.0,
"pct_surviving": 96.76653230750173,
"adapter_cutting_adapter_trimmed_reads": 97506.0,
"adapter_cutting_adapter_trimmed_bases": 2027444.0,
"pct_adapter": 6.162194136985678
},
"24-275_L001_R1_001": {
"filtering_result_passed_filter_reads": 1673478.0,
"filtering_result_low_quality_reads": 40514.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.10735199999999999,
"after_filtering_total_reads": 1673478.0,
"after_filtering_total_bases": 439326257.0,
"after_filtering_q20_bases": 401596781.0,
"after_filtering_q30_bases": 354025417.0,
"after_filtering_q20_rate": 0.91412,
"after_filtering_q30_rate": 0.805837,
"after_filtering_read1_mean_length": 262.0,
"after_filtering_read2_mean_length": 262.0,
"after_filtering_gc_content": 0.560707,
"before_filtering_total_reads": 1713992.0,
"pct_surviving": 97.6362783490238,
"adapter_cutting_adapter_trimmed_reads": 149906.0,
"adapter_cutting_adapter_trimmed_bases": 3353406.0,
"pct_adapter": 8.746015150595802
},
"24-269_L001_R1_001": {
"filtering_result_passed_filter_reads": 2302934.0,
"filtering_result_low_quality_reads": 51540.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.194948,
"after_filtering_total_reads": 2302934.0,
"after_filtering_total_bases": 560492864.0,
"after_filtering_q20_bases": 521238294.0,
"after_filtering_q30_bases": 469860972.0,
"after_filtering_q20_rate": 0.929964,
"after_filtering_q30_rate": 0.8383,
"after_filtering_read1_mean_length": 243.0,
"after_filtering_read2_mean_length": 243.0,
"after_filtering_gc_content": 0.565888,
"before_filtering_total_reads": 2354474.0,
"pct_surviving": 97.81097603965895,
"adapter_cutting_adapter_trimmed_reads": 203026.0,
"adapter_cutting_adapter_trimmed_bases": 5012952.0,
"pct_adapter": 8.622987554757454
},
"24-276_L001_R1_001": {
"filtering_result_passed_filter_reads": 1417706.0,
"filtering_result_low_quality_reads": 57658.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.12457900000000001,
"after_filtering_total_reads": 1417706.0,
"after_filtering_total_bases": 357804610.0,
"after_filtering_q20_bases": 328089731.0,
"after_filtering_q30_bases": 292560788.0,
"after_filtering_q20_rate": 0.916952,
"after_filtering_q30_rate": 0.817655,
"after_filtering_read1_mean_length": 252.0,
"after_filtering_read2_mean_length": 252.0,
"after_filtering_gc_content": 0.559441,
"before_filtering_total_reads": 1475364.0,
"pct_surviving": 96.0919474787239,
"adapter_cutting_adapter_trimmed_reads": 133228.0,
"adapter_cutting_adapter_trimmed_bases": 3293571.0,
"pct_adapter": 9.03017831531744
},
"24-246_L001_R1_001": {
"filtering_result_passed_filter_reads": 666866.0,
"filtering_result_low_quality_reads": 12134.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.0768778,
"after_filtering_total_reads": 666866.0,
"after_filtering_total_bases": 176201195.0,
"after_filtering_q20_bases": 158672752.0,
"after_filtering_q30_bases": 136656453.0,
"after_filtering_q20_rate": 0.90052,
"after_filtering_q30_rate": 0.775571,
"after_filtering_read1_mean_length": 264.0,
"after_filtering_read2_mean_length": 263.0,
"after_filtering_gc_content": 0.560996,
"before_filtering_total_reads": 679000.0,
"pct_surviving": 98.21296023564065,
"adapter_cutting_adapter_trimmed_reads": 41992.0,
"adapter_cutting_adapter_trimmed_bases": 873172.0,
"pct_adapter": 6.18438880706922
},
"24-287_L001_R1_001": {
"filtering_result_passed_filter_reads": 1761992.0,
"filtering_result_low_quality_reads": 57788.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.140786,
"after_filtering_total_reads": 1761992.0,
"after_filtering_total_bases": 452625879.0,
"after_filtering_q20_bases": 414709658.0,
"after_filtering_q30_bases": 368660242.0,
"after_filtering_q20_rate": 0.916231,
"after_filtering_q30_rate": 0.814492,
"after_filtering_read1_mean_length": 256.0,
"after_filtering_read2_mean_length": 256.0,
"after_filtering_gc_content": 0.562427,
"before_filtering_total_reads": 1819780.0,
"pct_surviving": 96.82445130730089,
"adapter_cutting_adapter_trimmed_reads": 143030.0,
"adapter_cutting_adapter_trimmed_bases": 3336030.0,
"pct_adapter": 7.859741287408369
},
"24-270_L001_R1_001": {
"filtering_result_passed_filter_reads": 2304254.0,
"filtering_result_low_quality_reads": 60436.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.145389,
"after_filtering_total_reads": 2304254.0,
"after_filtering_total_bases": 595769254.0,
"after_filtering_q20_bases": 548648672.0,
"after_filtering_q30_bases": 489253928.0,
"after_filtering_q20_rate": 0.920908,
"after_filtering_q30_rate": 0.821214,
"after_filtering_read1_mean_length": 258.0,
"after_filtering_read2_mean_length": 258.0,
"after_filtering_gc_content": 0.561259,
"before_filtering_total_reads": 2364690.0,
"pct_surviving": 97.44423159061019,
"adapter_cutting_adapter_trimmed_reads": 186984.0,
"adapter_cutting_adapter_trimmed_bases": 4124802.0,
"pct_adapter": 7.907336691067328
},
"24-279_L001_R1_001": {
"filtering_result_passed_filter_reads": 1851444.0,
"filtering_result_low_quality_reads": 55646.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.158147,
"after_filtering_total_reads": 1851444.0,
"after_filtering_total_bases": 467943785.0,
"after_filtering_q20_bases": 432758485.0,
"after_filtering_q30_bases": 388427451.0,
"after_filtering_q20_rate": 0.924809,
"after_filtering_q30_rate": 0.830073,
"after_filtering_read1_mean_length": 252.0,
"after_filtering_read2_mean_length": 252.0,
"after_filtering_gc_content": 0.564391,
"before_filtering_total_reads": 1907090.0,
"pct_surviving": 97.08215134052405,
"adapter_cutting_adapter_trimmed_reads": 146844.0,
"adapter_cutting_adapter_trimmed_bases": 3434243.0,
"pct_adapter": 7.699898798693297
},
"24-295_L001_R1_001": {
"filtering_result_passed_filter_reads": 1532672.0,
"filtering_result_low_quality_reads": 58592.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.127194,
"after_filtering_total_reads": 1532672.0,
"after_filtering_total_bases": 409691227.0,
"after_filtering_q20_bases": 374266368.0,
"after_filtering_q30_bases": 331209675.0,
"after_filtering_q20_rate": 0.913533,
"after_filtering_q30_rate": 0.808437,
"after_filtering_read1_mean_length": 267.0,
"after_filtering_read2_mean_length": 267.0,
"after_filtering_gc_content": 0.56121,
"before_filtering_total_reads": 1591264.0,
"pct_surviving": 96.31789571057978,
"adapter_cutting_adapter_trimmed_reads": 90732.0,
"adapter_cutting_adapter_trimmed_bases": 1903577.0,
"pct_adapter": 5.701882277233696
},
"24-277_L001_R1_001": {
"filtering_result_passed_filter_reads": 1498306.0,
"filtering_result_low_quality_reads": 33490.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.1132,
"after_filtering_total_reads": 1498306.0,
"after_filtering_total_bases": 389095200.0,
"after_filtering_q20_bases": 355870530.0,
"after_filtering_q30_bases": 314058539.0,
"after_filtering_q20_rate": 0.91461,
"after_filtering_q30_rate": 0.807151,
"after_filtering_read1_mean_length": 259.0,
"after_filtering_read2_mean_length": 259.0,
"after_filtering_gc_content": 0.562061,
"before_filtering_total_reads": 1531796.0,
"pct_surviving": 97.8136775393068,
"adapter_cutting_adapter_trimmed_reads": 106956.0,
"adapter_cutting_adapter_trimmed_bases": 2318318.0,
"pct_adapter": 6.982391911194441
},
"24-274_L001_R1_001": {
"filtering_result_passed_filter_reads": 1710930.0,
"filtering_result_low_quality_reads": 73040.0,
"filtering_result_too_many_N_reads": 0.0,
"filtering_result_too_short_reads": 0.0,
"filtering_result_too_long_reads": 0.0,
"pct_duplication": 0.116482,
"after_filtering_total_reads": 1710930.0,
"after_filtering_total_bases": 449663434.0,
"after_filtering_q20_bases": 407856941.0,
"after_filtering_q30_bases": 359065093.0,
"after_filtering_q20_rate": 0.907027,
"after_filtering_q30_rate": 0.79852,
"after_filtering_read1_mean_length": 262.0,
"after_filtering_read2_mean_length": 262.0,
"after_filtering_gc_content": 0.562058,
"before_filtering_total_reads": 1783970.0,
"pct_surviving": 95.90576074709777,
"adapter_cutting_adapter_trimmed_reads": 144878.0,
"adapter_cutting_adapter_trimmed_bases": 3253300.0,
"pct_adapter": 8.12110069115512
}
},
{
"R1.cor": {
"s_name": "R1.cor",
"totalpairs": 855465,
"discardpairs": 0,
"percdiscard": 0,
"combopairs": 796467,
"inniepairs": 0,
"outiepairs": 0,
"uncombopairs": 58998,
"perccombo": 93.1
}
}
],
"config_creation_date": "2024-11-28, 17:31 UTC",
"config_git_hash": null,
"config_intro_text": null,
"config_report_comment": null,
"config_report_header_info": null,
"config_script_path": "/home/ubuntu/miniconda3/envs/multiqc",
"config_short_version": "1.23",
"config_subtitle": null,
"config_title": null,
"config_version": "1.23",
"config_output_dir": "/home/ubuntu"
}
我希望使用 jq 从
"report_general_stats_data"
部分提取某些字段。我的预期输出类似于:
"Sample"\t"filtering_result_passed_filter_reads"\t"filtering_result_low_quality_reads"...\t"pct_adapter"
"24-240_L001_R1_001"\t"1106872.0"\t"32142.0"...\t"5.321795869058677"
其余样品依此类推。 到目前为止,我已经编写了以下脚本但没有成功:
jq -r '.report_general_stats_data[1] | @tsv' multiqc_data.json > multiqc_data.tsv
非常感谢任何帮助。
干杯,
阿尔贝托
这是我的尝试;这个想法是从第一个样本中获取密钥并使用它们来提取数据:
(
.report_general_stats_data[1]
| to_entries
| map({"Sample": .key} * .value)
) as $records
| (
$records[0]
| keys
) as $header
| (
$header,
(
$records[]
| . as $fields
| $head
| map($fields[.])
)
)
| @tsv
Sample adapter_cutting_adapter_trimmed_bases adapter_cutting_adapter_trimmed_reads after_filtering_gc_content after_filtering_q20_bases after_filtering_q20_rate after_filtering_q30_bases after_filtering_q30_rate after_filtering_read1_mean_length after_filtering_read2_mean_length after_filtering_total_bases after_filtering_total_reads before_filtering_total_reads filtering_result_low_quality_reads filtering_result_passed_filter_reads filtering_result_too_long_reads filtering_result_too_many_N_reads filtering_result_too_short_reads pct_adapter pct_duplication pct_surviving
24-240_L001_R1_001 1262446.0 60616.0 0.565903 268093576.0 0.903707 233354655.0 0.786607 268.0 267.0 296659710.0 1106872.0 1139014.0 32142.0 1106872.0 0.0 0.0 0.0 5.321795869058677 0.10114 97.1780856073762
24-278_L001_R1_001 4596580.0 187672.0 0.560734 491653949.0 0.928831 442959345.0 0.836837 244.0 244.0 529325786.0 2161572.0 2212442.0 50870.0 2161572.0 0.0 0.0 0.0 8.48257265049208 0.20167800000000002 97.70073068582137
24-288_L001_R1_001 2027444.0 97506.0 0.562734 375027676.0 0.916436 333000899.0 0.813738 267.0 267.0 409223893.0 1531162.0 1582326.0 51164.0 1531162.0 0.0 0.0 0.0 6.162194136985678 0.11451499999999999 96.76653230750173
24-275_L001_R1_001 3353406.0 149906.0 0.560707 401596781.0 0.91412 354025417.0 0.805837 262.0 262.0 439326257.0 1673478.0 1713992.0 40514.0 1673478.0 0.0 0.0 0.0 8.746015150595802 0.10735199999999999 97.6362783490238
24-269_L001_R1_001 5012952.0 203026.0 0.565888 521238294.0 0.929964 469860972.0 0.8383 243.0 243.0 560492864.0 2302934.0 2354474.0 51540.0 2302934.0 0.0 0.0 0.0 8.622987554757454 0.194948 97.81097603965895
24-276_L001_R1_001 3293571.0 133228.0 0.559441 328089731.0 0.916952 292560788.0 0.817655 252.0 252.0 357804610.0 1417706.0 1475364.0 57658.0 1417706.0 0.0 0.0 0.0 9.03017831531744 0.12457900000000001 96.0919474787239
24-246_L001_R1_001 873172.0 41992.0 0.560996 158672752.0 0.90052 136656453.0 0.775571 264.0 263.0 176201195.0 666866.0 679000.0 12134.0 666866.0 0.0 0.0 0.0 6.18438880706922 0.0768778 98.21296023564065
24-287_L001_R1_001 3336030.0 143030.0 0.562427 414709658.0 0.916231 368660242.0 0.814492 256.0 256.0 452625879.0 1761992.0 1819780.0 57788.0 1761992.0 0.0 0.0 0.0 7.859741287408369 0.140786 96.82445130730089
24-270_L001_R1_001 4124802.0 186984.0 0.561259 548648672.0 0.920908 489253928.0 0.821214 258.0 258.0 595769254.0 2304254.0 2364690.0 60436.0 2304254.0 0.0 0.0 0.0 7.907336691067328 0.145389 97.44423159061019
24-279_L001_R1_001 3434243.0 146844.0 0.564391 432758485.0 0.924809 388427451.0 0.830073 252.0 252.0 467943785.0 1851444.0 1907090.0 55646.0 1851444.0 0.0 0.0 0.0 7.699898798693297 0.158147 97.08215134052405
24-295_L001_R1_001 1903577.0 90732.0 0.56121 374266368.0 0.913533 331209675.0 0.808437 267.0 267.0 409691227.0 1532672.0 1591264.0 58592.0 1532672.0 0.0 0.0 0.0 5.701882277233696 0.127194 96.31789571057978
24-277_L001_R1_001 2318318.0 106956.0 0.562061 355870530.0 0.91461 314058539.0 0.807151 259.0 259.0 389095200.0 1498306.0 1531796.0 33490.0 1498306.0 0.0 0.0 0.0 6.982391911194441 0.1132 97.8136775393068
24-274_L001_R1_001 3253300.0 144878.0 0.562058 407856941.0 0.907027 359065093.0 0.79852 262.0 262.0 449663434.0 1710930.0 1783970.0 73040.0 1710930.0 0.0 0.0 0.0 8.12110069115512 0.116482 95.90576074709777