使用 jq 将部分 multiqc_data JSON 转换为 TSV

问题描述 投票:0回答:1

我正在尝试将 multiqc_data.json 文件转换为 tsv 文件。具体来说,我只需要“report_general_stats_data”部分中的部分数据。其结构为:

{
       "report_general_stats_data": [
        {
            "contigs": {
                "# contigs (>= 0 bp)": 232.0,
                "# contigs (>= 1000 bp)": 169.0,
                "# contigs (>= 5000 bp)": 121.0,
                "# contigs (>= 10000 bp)": 107.0,
                "# contigs (>= 25000 bp)": 75.0,
                "# contigs (>= 50000 bp)": 37.0,
                "Total length (>= 0 bp)": 5594769.0,
                "Total length (>= 1000 bp)": 5569601.0,
                "Total length (>= 5000 bp)": 5452934.0,
                "Total length (>= 10000 bp)": 5349401.0,
                "Total length (>= 25000 bp)": 4801158.0,
                "Total length (>= 50000 bp)": 3427439.0,
                "# contigs": 187.0,
                "Largest contig": 216595.0,
                "Total length": 5582595.0,
                "Reference length": 5472672.0,
                "GC (%)": 57.1,
                "Reference GC (%)": 57.37,
                "N50": 63236.0,
                "NG50": 64616.0,
                "N90": 19080.0,
                "NG90": 21621.0,
                "auN": 81138.9,
                "auNG": 82768.6,
                "L50": 26.0,
                "LG50": 25.0,
                "L90": 86.0,
                "LG90": 81.0,
                "# N's per 100 kbp": 0.0
            }
        },
        {
            "24-240_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1106872.0,
                "filtering_result_low_quality_reads": 32142.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.10114,
                "after_filtering_total_reads": 1106872.0,
                "after_filtering_total_bases": 296659710.0,
                "after_filtering_q20_bases": 268093576.0,
                "after_filtering_q30_bases": 233354655.0,
                "after_filtering_q20_rate": 0.903707,
                "after_filtering_q30_rate": 0.786607,
                "after_filtering_read1_mean_length": 268.0,
                "after_filtering_read2_mean_length": 267.0,
                "after_filtering_gc_content": 0.565903,
                "before_filtering_total_reads": 1139014.0,
                "pct_surviving": 97.1780856073762,
                "adapter_cutting_adapter_trimmed_reads": 60616.0,
                "adapter_cutting_adapter_trimmed_bases": 1262446.0,
                "pct_adapter": 5.321795869058677
            },
            "24-278_L001_R1_001": {
                "filtering_result_passed_filter_reads": 2161572.0,
                "filtering_result_low_quality_reads": 50870.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.20167800000000002,
                "after_filtering_total_reads": 2161572.0,
                "after_filtering_total_bases": 529325786.0,
                "after_filtering_q20_bases": 491653949.0,
                "after_filtering_q30_bases": 442959345.0,
                "after_filtering_q20_rate": 0.928831,
                "after_filtering_q30_rate": 0.836837,
                "after_filtering_read1_mean_length": 244.0,
                "after_filtering_read2_mean_length": 244.0,
                "after_filtering_gc_content": 0.560734,
                "before_filtering_total_reads": 2212442.0,
                "pct_surviving": 97.70073068582137,
                "adapter_cutting_adapter_trimmed_reads": 187672.0,
                "adapter_cutting_adapter_trimmed_bases": 4596580.0,
                "pct_adapter": 8.48257265049208
            },
            "24-288_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1531162.0,
                "filtering_result_low_quality_reads": 51164.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.11451499999999999,
                "after_filtering_total_reads": 1531162.0,
                "after_filtering_total_bases": 409223893.0,
                "after_filtering_q20_bases": 375027676.0,
                "after_filtering_q30_bases": 333000899.0,
                "after_filtering_q20_rate": 0.916436,
                "after_filtering_q30_rate": 0.813738,
                "after_filtering_read1_mean_length": 267.0,
                "after_filtering_read2_mean_length": 267.0,
                "after_filtering_gc_content": 0.562734,
                "before_filtering_total_reads": 1582326.0,
                "pct_surviving": 96.76653230750173,
                "adapter_cutting_adapter_trimmed_reads": 97506.0,
                "adapter_cutting_adapter_trimmed_bases": 2027444.0,
                "pct_adapter": 6.162194136985678
            },
            "24-275_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1673478.0,
                "filtering_result_low_quality_reads": 40514.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.10735199999999999,
                "after_filtering_total_reads": 1673478.0,
                "after_filtering_total_bases": 439326257.0,
                "after_filtering_q20_bases": 401596781.0,
                "after_filtering_q30_bases": 354025417.0,
                "after_filtering_q20_rate": 0.91412,
                "after_filtering_q30_rate": 0.805837,
                "after_filtering_read1_mean_length": 262.0,
                "after_filtering_read2_mean_length": 262.0,
                "after_filtering_gc_content": 0.560707,
                "before_filtering_total_reads": 1713992.0,
                "pct_surviving": 97.6362783490238,
                "adapter_cutting_adapter_trimmed_reads": 149906.0,
                "adapter_cutting_adapter_trimmed_bases": 3353406.0,
                "pct_adapter": 8.746015150595802
            },
            "24-269_L001_R1_001": {
                "filtering_result_passed_filter_reads": 2302934.0,
                "filtering_result_low_quality_reads": 51540.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.194948,
                "after_filtering_total_reads": 2302934.0,
                "after_filtering_total_bases": 560492864.0,
                "after_filtering_q20_bases": 521238294.0,
                "after_filtering_q30_bases": 469860972.0,
                "after_filtering_q20_rate": 0.929964,
                "after_filtering_q30_rate": 0.8383,
                "after_filtering_read1_mean_length": 243.0,
                "after_filtering_read2_mean_length": 243.0,
                "after_filtering_gc_content": 0.565888,
                "before_filtering_total_reads": 2354474.0,
                "pct_surviving": 97.81097603965895,
                "adapter_cutting_adapter_trimmed_reads": 203026.0,
                "adapter_cutting_adapter_trimmed_bases": 5012952.0,
                "pct_adapter": 8.622987554757454
            },
            "24-276_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1417706.0,
                "filtering_result_low_quality_reads": 57658.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.12457900000000001,
                "after_filtering_total_reads": 1417706.0,
                "after_filtering_total_bases": 357804610.0,
                "after_filtering_q20_bases": 328089731.0,
                "after_filtering_q30_bases": 292560788.0,
                "after_filtering_q20_rate": 0.916952,
                "after_filtering_q30_rate": 0.817655,
                "after_filtering_read1_mean_length": 252.0,
                "after_filtering_read2_mean_length": 252.0,
                "after_filtering_gc_content": 0.559441,
                "before_filtering_total_reads": 1475364.0,
                "pct_surviving": 96.0919474787239,
                "adapter_cutting_adapter_trimmed_reads": 133228.0,
                "adapter_cutting_adapter_trimmed_bases": 3293571.0,
                "pct_adapter": 9.03017831531744
            },
            "24-246_L001_R1_001": {
                "filtering_result_passed_filter_reads": 666866.0,
                "filtering_result_low_quality_reads": 12134.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.0768778,
                "after_filtering_total_reads": 666866.0,
                "after_filtering_total_bases": 176201195.0,
                "after_filtering_q20_bases": 158672752.0,
                "after_filtering_q30_bases": 136656453.0,
                "after_filtering_q20_rate": 0.90052,
                "after_filtering_q30_rate": 0.775571,
                "after_filtering_read1_mean_length": 264.0,
                "after_filtering_read2_mean_length": 263.0,
                "after_filtering_gc_content": 0.560996,
                "before_filtering_total_reads": 679000.0,
                "pct_surviving": 98.21296023564065,
                "adapter_cutting_adapter_trimmed_reads": 41992.0,
                "adapter_cutting_adapter_trimmed_bases": 873172.0,
                "pct_adapter": 6.18438880706922
            },
            "24-287_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1761992.0,
                "filtering_result_low_quality_reads": 57788.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.140786,
                "after_filtering_total_reads": 1761992.0,
                "after_filtering_total_bases": 452625879.0,
                "after_filtering_q20_bases": 414709658.0,
                "after_filtering_q30_bases": 368660242.0,
                "after_filtering_q20_rate": 0.916231,
                "after_filtering_q30_rate": 0.814492,
                "after_filtering_read1_mean_length": 256.0,
                "after_filtering_read2_mean_length": 256.0,
                "after_filtering_gc_content": 0.562427,
                "before_filtering_total_reads": 1819780.0,
                "pct_surviving": 96.82445130730089,
                "adapter_cutting_adapter_trimmed_reads": 143030.0,
                "adapter_cutting_adapter_trimmed_bases": 3336030.0,
                "pct_adapter": 7.859741287408369
            },
            "24-270_L001_R1_001": {
                "filtering_result_passed_filter_reads": 2304254.0,
                "filtering_result_low_quality_reads": 60436.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.145389,
                "after_filtering_total_reads": 2304254.0,
                "after_filtering_total_bases": 595769254.0,
                "after_filtering_q20_bases": 548648672.0,
                "after_filtering_q30_bases": 489253928.0,
                "after_filtering_q20_rate": 0.920908,
                "after_filtering_q30_rate": 0.821214,
                "after_filtering_read1_mean_length": 258.0,
                "after_filtering_read2_mean_length": 258.0,
                "after_filtering_gc_content": 0.561259,
                "before_filtering_total_reads": 2364690.0,
                "pct_surviving": 97.44423159061019,
                "adapter_cutting_adapter_trimmed_reads": 186984.0,
                "adapter_cutting_adapter_trimmed_bases": 4124802.0,
                "pct_adapter": 7.907336691067328
            },
            "24-279_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1851444.0,
                "filtering_result_low_quality_reads": 55646.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.158147,
                "after_filtering_total_reads": 1851444.0,
                "after_filtering_total_bases": 467943785.0,
                "after_filtering_q20_bases": 432758485.0,
                "after_filtering_q30_bases": 388427451.0,
                "after_filtering_q20_rate": 0.924809,
                "after_filtering_q30_rate": 0.830073,
                "after_filtering_read1_mean_length": 252.0,
                "after_filtering_read2_mean_length": 252.0,
                "after_filtering_gc_content": 0.564391,
                "before_filtering_total_reads": 1907090.0,
                "pct_surviving": 97.08215134052405,
                "adapter_cutting_adapter_trimmed_reads": 146844.0,
                "adapter_cutting_adapter_trimmed_bases": 3434243.0,
                "pct_adapter": 7.699898798693297
            },
            "24-295_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1532672.0,
                "filtering_result_low_quality_reads": 58592.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.127194,
                "after_filtering_total_reads": 1532672.0,
                "after_filtering_total_bases": 409691227.0,
                "after_filtering_q20_bases": 374266368.0,
                "after_filtering_q30_bases": 331209675.0,
                "after_filtering_q20_rate": 0.913533,
                "after_filtering_q30_rate": 0.808437,
                "after_filtering_read1_mean_length": 267.0,
                "after_filtering_read2_mean_length": 267.0,
                "after_filtering_gc_content": 0.56121,
                "before_filtering_total_reads": 1591264.0,
                "pct_surviving": 96.31789571057978,
                "adapter_cutting_adapter_trimmed_reads": 90732.0,
                "adapter_cutting_adapter_trimmed_bases": 1903577.0,
                "pct_adapter": 5.701882277233696
            },
            "24-277_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1498306.0,
                "filtering_result_low_quality_reads": 33490.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.1132,
                "after_filtering_total_reads": 1498306.0,
                "after_filtering_total_bases": 389095200.0,
                "after_filtering_q20_bases": 355870530.0,
                "after_filtering_q30_bases": 314058539.0,
                "after_filtering_q20_rate": 0.91461,
                "after_filtering_q30_rate": 0.807151,
                "after_filtering_read1_mean_length": 259.0,
                "after_filtering_read2_mean_length": 259.0,
                "after_filtering_gc_content": 0.562061,
                "before_filtering_total_reads": 1531796.0,
                "pct_surviving": 97.8136775393068,
                "adapter_cutting_adapter_trimmed_reads": 106956.0,
                "adapter_cutting_adapter_trimmed_bases": 2318318.0,
                "pct_adapter": 6.982391911194441
            },
            "24-274_L001_R1_001": {
                "filtering_result_passed_filter_reads": 1710930.0,
                "filtering_result_low_quality_reads": 73040.0,
                "filtering_result_too_many_N_reads": 0.0,
                "filtering_result_too_short_reads": 0.0,
                "filtering_result_too_long_reads": 0.0,
                "pct_duplication": 0.116482,
                "after_filtering_total_reads": 1710930.0,
                "after_filtering_total_bases": 449663434.0,
                "after_filtering_q20_bases": 407856941.0,
                "after_filtering_q30_bases": 359065093.0,
                "after_filtering_q20_rate": 0.907027,
                "after_filtering_q30_rate": 0.79852,
                "after_filtering_read1_mean_length": 262.0,
                "after_filtering_read2_mean_length": 262.0,
                "after_filtering_gc_content": 0.562058,
                "before_filtering_total_reads": 1783970.0,
                "pct_surviving": 95.90576074709777,
                "adapter_cutting_adapter_trimmed_reads": 144878.0,
                "adapter_cutting_adapter_trimmed_bases": 3253300.0,
                "pct_adapter": 8.12110069115512
            }
        },
        {
            "R1.cor": {
                "s_name": "R1.cor",
                "totalpairs": 855465,
                "discardpairs": 0,
                "percdiscard": 0,
                "combopairs": 796467,
                "inniepairs": 0,
                "outiepairs": 0,
                "uncombopairs": 58998,
                "perccombo": 93.1
            }
        }
    ],
    "config_creation_date": "2024-11-28, 17:31 UTC",
    "config_git_hash": null,
    "config_intro_text": null,
    "config_report_comment": null,
    "config_report_header_info": null,
    "config_script_path": "/home/ubuntu/miniconda3/envs/multiqc",
    "config_short_version": "1.23",
    "config_subtitle": null,
    "config_title": null,
    "config_version": "1.23",
    "config_output_dir": "/home/ubuntu"
}

我希望使用 jq 从

"report_general_stats_data"
部分提取某些字段。我的预期输出类似于:

"Sample"\t"filtering_result_passed_filter_reads"\t"filtering_result_low_quality_reads"...\t"pct_adapter"
"24-240_L001_R1_001"\t"1106872.0"\t"32142.0"...\t"5.321795869058677"

其余样品依此类推。 到目前为止,我已经编写了以下脚本但没有成功:

jq -r '.report_general_stats_data[1] | @tsv' multiqc_data.json > multiqc_data.tsv

非常感谢任何帮助。

干杯,

阿尔贝托

json csv jq
1个回答
0
投票

这是我的尝试;这个想法是从第一个样本中获取密钥并使用它们来提取数据:

(
    .report_general_stats_data[1]
    | to_entries
    | map({"Sample": .key} * .value)
) as $records
| (
    $records[0]
    | keys
) as $header
| (
    $header,
    (
        $records[]
        | . as $fields
        | $head
        | map($fields[.])
    )
)
| @tsv
Sample              adapter_cutting_adapter_trimmed_bases  adapter_cutting_adapter_trimmed_reads  after_filtering_gc_content  after_filtering_q20_bases  after_filtering_q20_rate  after_filtering_q30_bases  after_filtering_q30_rate  after_filtering_read1_mean_length  after_filtering_read2_mean_length  after_filtering_total_bases  after_filtering_total_reads  before_filtering_total_reads  filtering_result_low_quality_reads  filtering_result_passed_filter_reads  filtering_result_too_long_reads  filtering_result_too_many_N_reads  filtering_result_too_short_reads  pct_adapter        pct_duplication      pct_surviving
24-240_L001_R1_001  1262446.0                              60616.0                                0.565903                    268093576.0                0.903707                  233354655.0                0.786607                  268.0                              267.0                              296659710.0                  1106872.0                    1139014.0                     32142.0                             1106872.0                             0.0                              0.0                                0.0                               5.321795869058677  0.10114              97.1780856073762
24-278_L001_R1_001  4596580.0                              187672.0                               0.560734                    491653949.0                0.928831                  442959345.0                0.836837                  244.0                              244.0                              529325786.0                  2161572.0                    2212442.0                     50870.0                             2161572.0                             0.0                              0.0                                0.0                               8.48257265049208   0.20167800000000002  97.70073068582137
24-288_L001_R1_001  2027444.0                              97506.0                                0.562734                    375027676.0                0.916436                  333000899.0                0.813738                  267.0                              267.0                              409223893.0                  1531162.0                    1582326.0                     51164.0                             1531162.0                             0.0                              0.0                                0.0                               6.162194136985678  0.11451499999999999  96.76653230750173
24-275_L001_R1_001  3353406.0                              149906.0                               0.560707                    401596781.0                0.91412                   354025417.0                0.805837                  262.0                              262.0                              439326257.0                  1673478.0                    1713992.0                     40514.0                             1673478.0                             0.0                              0.0                                0.0                               8.746015150595802  0.10735199999999999  97.6362783490238
24-269_L001_R1_001  5012952.0                              203026.0                               0.565888                    521238294.0                0.929964                  469860972.0                0.8383                    243.0                              243.0                              560492864.0                  2302934.0                    2354474.0                     51540.0                             2302934.0                             0.0                              0.0                                0.0                               8.622987554757454  0.194948             97.81097603965895
24-276_L001_R1_001  3293571.0                              133228.0                               0.559441                    328089731.0                0.916952                  292560788.0                0.817655                  252.0                              252.0                              357804610.0                  1417706.0                    1475364.0                     57658.0                             1417706.0                             0.0                              0.0                                0.0                               9.03017831531744   0.12457900000000001  96.0919474787239
24-246_L001_R1_001  873172.0                               41992.0                                0.560996                    158672752.0                0.90052                   136656453.0                0.775571                  264.0                              263.0                              176201195.0                  666866.0                     679000.0                      12134.0                             666866.0                              0.0                              0.0                                0.0                               6.18438880706922   0.0768778            98.21296023564065
24-287_L001_R1_001  3336030.0                              143030.0                               0.562427                    414709658.0                0.916231                  368660242.0                0.814492                  256.0                              256.0                              452625879.0                  1761992.0                    1819780.0                     57788.0                             1761992.0                             0.0                              0.0                                0.0                               7.859741287408369  0.140786             96.82445130730089
24-270_L001_R1_001  4124802.0                              186984.0                               0.561259                    548648672.0                0.920908                  489253928.0                0.821214                  258.0                              258.0                              595769254.0                  2304254.0                    2364690.0                     60436.0                             2304254.0                             0.0                              0.0                                0.0                               7.907336691067328  0.145389             97.44423159061019
24-279_L001_R1_001  3434243.0                              146844.0                               0.564391                    432758485.0                0.924809                  388427451.0                0.830073                  252.0                              252.0                              467943785.0                  1851444.0                    1907090.0                     55646.0                             1851444.0                             0.0                              0.0                                0.0                               7.699898798693297  0.158147             97.08215134052405
24-295_L001_R1_001  1903577.0                              90732.0                                0.56121                     374266368.0                0.913533                  331209675.0                0.808437                  267.0                              267.0                              409691227.0                  1532672.0                    1591264.0                     58592.0                             1532672.0                             0.0                              0.0                                0.0                               5.701882277233696  0.127194             96.31789571057978
24-277_L001_R1_001  2318318.0                              106956.0                               0.562061                    355870530.0                0.91461                   314058539.0                0.807151                  259.0                              259.0                              389095200.0                  1498306.0                    1531796.0                     33490.0                             1498306.0                             0.0                              0.0                                0.0                               6.982391911194441  0.1132               97.8136775393068
24-274_L001_R1_001  3253300.0                              144878.0                               0.562058                    407856941.0                0.907027                  359065093.0                0.79852                   262.0                              262.0                              449663434.0                  1710930.0                    1783970.0                     73040.0                             1710930.0                             0.0                              0.0                                0.0                               8.12110069115512   0.116482             95.90576074709777
© www.soinside.com 2019 - 2024. All rights reserved.