使用 SheetJS 库和 Google Apps 脚本将数据从 Excel 转换为 CSV 时出错

问题描述 投票:0回答:1

我在 Google Drive 文件夹中有 6 个文件(5 个 xlsx、1 个 csv),我想将其导入到 Google Sheet 的 6 个不同选项卡中。 Tanaike 提出了一个解决方案here,如果文件大小不大,该解决方案效果很好。不幸的是,我的文件有数千个条目(大约 50000 个条目),文件大小为 4mb-5mb。这是脚本:

function importExcel1(file, sheet) {
  // Library Key: 1B0eoHz03BVtSZhJAocaGNq94RjoXocz8xGMaLzwVdmAvYW5k8s5Yd360
  // Retrieve values from XLSX file.
  const MD = MicrosoftDocsApp.setFileId(file.getId());
  const srcSS = MD.getSpreadsheet();
  const values = srcSS.getSheets()[0].getDataRange().getValues();
  // console.log(values); // Confirm the retrieved values in the log.

  MD.end(); // When this line is run, the Spreadsheet created as a temporal file from the XLSX file is removed.
  if (values.length > 0) {
    sheet.getRange(1, 1, values.length, values[0].length).setValues(values);
  }
}

它可以快速从 Excel 文件获取数据,但在选项卡中设置值时出现以下错误

函数(导入Excel1): 错误:超过最大执行时间

我尝试了另一种方法SheetJS Library - 代码改编自Tanaike的解决方案

 function importFiles() {
  var folderId = 'folder ID'; // ID of the folder where files are stored
  var folder = DriveApp.getFolderById(folderId);
  var files = folder.getFiles();
  var ss = SpreadsheetApp.getActiveSpreadsheet();

  while (files.hasNext()) {
    var file = files.next();
    var fileName = file.getName();

    // Skip files that already have "Done" in the name
    if (fileName.includes('Done')) {
      console.log(`Skipping already processed file: ${fileName}`);
      continue;
    }

    var fileType = fileName.slice(0, 6).toLowerCase();
    var sheet;
    var range;

    switch (fileType) {
      case 'parcel':
        sheet = ss.getSheetByName("import - shipping");
        range = 'A:T';
        break;
      case 'kit-re':
        sheet = ss.getSheetByName("import - kitting");
        range = 'A:G';
        break;
      case 'req-re':
        sheet = ss.getSheetByName("import - orders");
        range = 'A:U';
        break;
      case 'billin':
        sheet = ss.getSheetByName("import - billing codes");
        range = 'A:G';
        break;
      case 'req_li':
        sheet = ss.getSheetByName("import - order line");
        range = 'A:G';
        break;
      case 'packag':
        if (fileName.endsWith('.csv')) {
          sheet = ss.getSheetByName("Manual Extract - Package Grid");
          range = 'A:AO';
        }
        break;
      default:
        continue; // Skip files that do not match the criteria
    }

    if (sheet && range) {
      sheet.getRange(range).clearContent();
      SpreadsheetApp.flush();
      console.log("Sheet: " + sheet.getName());
      console.log("File: " + file.getName());
      if (file.getMimeType() === MimeType.CSV) {
        importCSV(file, sheet);
      } else {
        convertExcelToCSV(file, sheet);
      }

      // Rename the file by adding "Done" to the end of its name
      // file.setName(fileName + ' Done');
      // console.log(`Processed and renamed file: ${fileName} to ${file.getName()}`);
    }
  }
}

function importCSV(dataOrFile, sheet) {
  var csvData;

  if (typeof dataOrFile === 'string') {
    // If dataOrFile is a string, treat it as CSV data
    csvData = dataOrFile;
  } else {
    // If dataOrFile is a file, extract the data as a string
    csvData = dataOrFile.getBlob().getDataAsString();
  }

  var sheetId = sheet.getSheetId();
  console.log(sheetId);

  var resource = {
    requests: [
      {
        pasteData: {
          data: csvData,
          coordinate: { sheetId: sheetId },
          delimiter: ","
        }
      }
    ]
  };

  Sheets.Spreadsheets.batchUpdate(resource, 'Spreadsheet ID');
}

function convertExcelToCSV(file, sheet) {
  // Get the Excel file as a blob
  var excelBlob = DriveApp.getFileById(file.getId()).getBlob();

  // Convert the Blob into a byte array and read it using the XLSX library
  var data = new Uint8Array(excelBlob.getBytes());
  var workbook = XLSX.read(data, { type: 'array' });

  // Get the first sheet from the workbook
  var firstSheetName = workbook.SheetNames[0];
  var firstSheet = workbook.Sheets[firstSheetName];

  var csvData = XLSX.utils.sheet_to_csv(firstSheet);

  // Import the CSV data into the Google Sheet
  importCSV(csvData, sheet);
}

我不确定它是否能更有效地获取数据,因为我不断收到此错误:

错误:无效列-1

encode_col @ SheetJS.gs:4454

sheet_to_csv @ SheetJS.gs:27507

有没有其他方法可以让我们更可靠地发出

batch
请求,非常感谢解决此错误的任何帮助?

excel google-sheets google-apps-script import-from-excel sheetjs
1个回答
0
投票

根据您的情况,进行以下修改如何?

在此修改后的脚本中,运行以下步骤。

  1. 将 XLSX 或 CSV 数据转换为 Google 电子表格。
  2. 使用 Sheets API 从转换后的电子表格中检索值。
  3. 将值放入目标电子表格中的每个工作表。
  4. 删除转换后的电子表格。

在使用此脚本之前,请在高级 Google 服务中启用 Drive API v3 和 Sheets API。并且,请设置您的文件夹 ID。

我猜在你的情况下,你想从每个 XLSX 中的第一张表中检索值。

修改后的脚本1:

在此脚本中,检索所有值后,将这些值放入每个工作表中。

function importFiles() {
  var folderId = '###'; // ID of the folder where files are stored
  var folder = DriveApp.getFolderById(folderId);
  var files = folder.getFiles(); // or const files = DriveApp.getFolderById(folderId).searchFiles(`mimeType='${MimeType.MICROSOFT_EXCEL}' or mimeType='${MimeType.CSV}' and trashed=false`);
  var ss = SpreadsheetApp.getActiveSpreadsheet();

  var data = []; // Added
  while (files.hasNext()) {
    var file = files.next();
    var fileName = file.getName();

    // Skip files that already have "Done" in the name
    if (fileName.includes('Done')) {
      console.log(`Skipping already processed file: ${fileName}`);
      continue;
    }

    var fileType = fileName.slice(0, 6).toLowerCase();
    var sheet;
    var range;

    switch (fileType) {
      case 'parcel':
        sheet = ss.getSheetByName("import - shipping");
        range = 'A:T';
        break;
      case 'kit-re':
        sheet = ss.getSheetByName("import - kitting");
        range = 'A:G';
        break;
      case 'req-re':
        sheet = ss.getSheetByName("import - orders");
        range = 'A:U';
        break;
      case 'billin':
        sheet = ss.getSheetByName("import - billing codes");
        range = 'A:G';
        break;
      case 'req_li':
        sheet = ss.getSheetByName("import - order line");
        range = 'A:G';
        break;
      case 'packag':
        if (fileName.endsWith('.csv')) {
          sheet = ss.getSheetByName("Manual Extract - Package Grid");
          range = 'A:AO';
        }
        break;
      default:
        continue; // Skip files that do not match the criteria
    }

    if (sheet && range) {
      sheet.getRange(range).clearContent();
      SpreadsheetApp.flush();
      console.log("Sheet: " + sheet.getName());
      console.log("File: " + file.getName());

// I modified the below script.
      const id = Drive.Files.copy({ mimeType: MimeType.GOOGLE_SHEETS }, file.getId()).id;
      const { values } = Sheets.Spreadsheets.Values.get(id, "A:ZZZ");
      data.push({ values, range: sheet.getName() });
      Drive.Files.remove(id);
    }
  }
  if (data.length == 0) return;
  Sheets.Spreadsheets.Values.batchUpdate({ data, valueInputOption: "USER_ENTERED" }, ss.getId());
}

修改后的脚本2:

在此脚本中,每个文件的每个值都被放入 while 循环中的每个工作表中。

function importFiles() {
  var folderId = '###'; // ID of the folder where files are stored
  var folder = DriveApp.getFolderById(folderId);
  var files = folder.getFiles();
  var ss = SpreadsheetApp.getActiveSpreadsheet();

  while (files.hasNext()) {
    var file = files.next();
    var fileName = file.getName();

    // Skip files that already have "Done" in the name
    if (fileName.includes('Done')) {
      console.log(`Skipping already processed file: ${fileName}`);
      continue;
    }

    var fileType = fileName.slice(0, 6).toLowerCase();
    var sheet;
    var range;

    switch (fileType) {
      case 'parcel':
        sheet = ss.getSheetByName("import - shipping");
        range = 'A:T';
        break;
      case 'kit-re':
        sheet = ss.getSheetByName("import - kitting");
        range = 'A:G';
        break;
      case 'req-re':
        sheet = ss.getSheetByName("import - orders");
        range = 'A:U';
        break;
      case 'billin':
        sheet = ss.getSheetByName("import - billing codes");
        range = 'A:G';
        break;
      case 'req_li':
        sheet = ss.getSheetByName("import - order line");
        range = 'A:G';
        break;
      case 'packag':
        if (fileName.endsWith('.csv')) {
          sheet = ss.getSheetByName("Manual Extract - Package Grid");
          range = 'A:AO';
        }
        break;
      default:
        continue; // Skip files that do not match the criteria
    }

    if (sheet && range) {
      sheet.getRange(range).clearContent();
      SpreadsheetApp.flush();
      console.log("Sheet: " + sheet.getName());
      console.log("File: " + file.getName());

      const id = Drive.Files.copy({ mimeType: MimeType.GOOGLE_SHEETS }, file.getId()).id;
      const { values } = Sheets.Spreadsheets.Values.get(id, "A:ZZZ");
      if (values.length == 0) continue;
      Sheets.Spreadsheets.Values.update({ values }, ss.getId(), sheet.getName(), { valueInputOption: "USER_ENTERED" });
      Drive.Files.remove(id);
      // Utilities.sleep(3000); // This line might be required to be used.
    }
  }
}

参考资料:

© www.soinside.com 2019 - 2024. All rights reserved.