使用 ScriptProperties“批处理”时尝试/捕获函数不一致

问题描述 投票:0回答:1

所以,这篇文章是我 10 个月前发表的一篇文章的延续。我将在本文中多次引用它,但这里有一个可返回的链接:导入大量 API 数据

首先,我一直在开发一个数据库,该数据库从数千个 API/JSON 链接中提取数据来创建数据。经过多次单独尝试后,导致代码出错、超时或彻底失败。我发了一篇文章来寻找答案,这就是我尝试使用“批处理”的原因。这是提供的代码,并进行了一些小的调整以适应数据,而不是“批处理”部分,它起作用了:

var scriptProperties = PropertiesService.getScriptProperties();

function dataImport() {
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheet = ss.getSheetByName("Import1");
  var exportSheet = ss.getSheetByName("Data1");
  var reqs = sheet.getRange("A2:A" + sheet.getLastRow()).getDisplayValues().reduce((ar, [url]) => {
    if (url) {
      ar.push({ url, muteHttpExceptions: true });
    }
    return ar;
  }, []);

  //Storage of current data
  var bucket = [];
  var batchSize = 200;
  var batches = batchArray(reqs, batchSize);
  var startingBatch = scriptProperties.getProperty("batchNumber") == null ? 0 : parseInt(scriptProperties.getProperty("batchNumber"));
  var processedBatches = scriptProperties.getProperty("processedBatches") == null ? 0 : parseInt(scriptProperties.getProperty("processedBatches"));

  console.log(`Total: ${reqs.length}.\n${batches.length} batches.`)

  if (processedBatches >= (batches.length - 1)) {
    console.log('All data has been processed already.');
  } else {
    //Start from the very last batch that stopped that needs to be processed.
    for (let i = startingBatch; i < batches.length; i++) {
      console.log(`Processing batch index #${parseInt(i)}`);
      try {
        var responses = UrlFetchApp.fetchAll(batches[i]);
        bucket.push(responses);
        //Remove previous batch index number
        scriptProperties.deleteProperty("processedBatches");
        //Store latest sucessful batch index number
        scriptProperties.setProperty("processedBatches", parseInt(i));
      }
      //Catch the last batch index number where it stopped due to URL fetch exception
      catch (e) {
        //Remove the old batch number to be replaced with new batch number.
        scriptProperties.deleteProperty("batchNumber");
        //Remember the last batch that encountered and error to be processed again in the next call.
        scriptProperties.setProperty("batchNumber", parseInt(i));
        console.log(`Batch index #${parseInt(i)} stopped`);
        break;
      }
    }

    const initialRes = [].concat.apply([], bucket);

    var temp = initialRes.reduce((ar, r) => {
      if (r.getResponseCode() == 200) {
        var { id, firstName, lastName, fullName, displayName, shortName, weight, height, position: { abbreviation }, team: { $ref }, birthPlace: { city }, birthPlace: { state, country }, experience: { years, displayValue }, jersey, active } = JSON.parse(r.getContentText());
        ar.push([id, firstName, lastName, fullName, displayName, shortName, weight, height, abbreviation, $ref, city, state, country, years, displayValue, jersey, active]);
      }
      return ar;
    }, []);
    var res = [...temp];

    //Add table headers
    exportSheet.getLastRow() == 0 && exportSheet.appendRow(['IDs', 'First Name', 'Last Name', 'Full Name', 'Display Name', 'Short Name', 'Weight', 'Height', 'Position', 'Team URL', 'City', 'State', 'Country', 'Years', 'Class', 'jersey', 'Active']);

    //Add table data
    var result = () => {
      return temp.length != 0 && exportSheet.getRange(exportSheet.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
    }
    result() && console.log(`Processed: ${res.length}.`);
  }
}

//Function to chunk the request data based on batch sizes
function batchArray(arr, batchSize) {
  var batches = [];

  for (var i = 0; i < arr.length; i += batchSize) {
    batches.push(arr.slice(i, i + batchSize));
  }

  return batches;
}

//Optional function to check the latest processed batch
function checkBatch() {
 console.log(`${parseInt(scriptProperties.getProperty("processedBatches"))+1} batches have been processed.`);
}

此代码应该处理然后将每个批次单独应用到“导入数据”表。当我多次运行代码时,它会增加

batchNumber
processedBatches
ScriptProperties。我会在 Apps 脚本中添加一个触发器,该触发器会在所有批次运行时每分钟运行一次代码。通过保存属性,目标是从本质上保存批处理的工作进度,以避免代码超时。当它工作时,每次运行代码大约需要 10-15 秒,然后每分钟触发器将应用所有批次。帖子中的答案更详细地介绍了它的工作原理,包括 GIF 演示,所以如果您想了解详细信息,它比我解释得更好!

快进到现在,我希望用最新信息更新我的数据,并尝试使用我之前使用的相同代码。令我沮丧的是,之前有效的“批处理”功能无法正常运行。很难用任何其他不一致的方式来描述,因为它已经完成了多种不同的事情,而我什至没有对代码进行任何真正的更改。有时,它会陷入处理循环,它将继续执行 try/catch 循环的“try”部分,将一批又一批列为“处理”,但实际上并不应用数据。处理将继续进行,直到代码本身超时,并且只会更新

processedBatches
属性,而不是
batchNumber
属性。这是我最常看到的选项。其他时候,我看到它处理它所在的任何批次,然后停止,只运行一次后不放入任何数据。然后在极少数情况下,它会在代码中放入一批,但我无法让它连续两次这样做。通常,当其中一批确实进入工作表时,下次运行它时,我将再次陷入处理循环。一般来说,除非之前的尝试成功,否则我一直在尝试多种操作,包括将两个属性设置为零并删除属性以尝试重置代码,但都没有产生成功的结果。另外,作为额外的参考,当代码运行正确时,我不需要手动调整属性,直到应用了所有批次,然后我会重置它,但在当前的尝试中还没有任何接近的结果。

我不确定到底该去哪里尝试解决这个问题。当我尝试将其组合在一起时,我已经感到迷失了,现在我不知道为什么它停止工作。我确实在代码中添加了额外的

console.log
来尝试查找可能发生错误的位置,这就是我发现“catch”处理不一致的地方。

作为参考,这是我一直在使用的代码的最新版本,并表明我不认为的大部分内部结构已被更改。事实上,甚至返回并将“批处理”部分重新粘贴到代码中以尝试使其正常工作:

var scriptProperties = PropertiesService.getScriptProperties();

function dataImport() {
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheet = ss.getSheetByName("Base JSON Import");
  var exportSheet = ss.getSheetByName("Import Data");
  var reqs = sheet.getRange("A2:A" + sheet.getLastRow()).getDisplayValues().reduce((ar, [url]) => {
    if (url) {
      ar.push({ url, muteHttpExceptions: true });
    }
    return ar;
  }, []);

  //Storage of current data
  var bucket = [];
  var batchSize = 200;
  var batches = batchArray(reqs, batchSize);
  var startingBatch = scriptProperties.getProperty("batchNumber") == null ? 0 : parseInt(scriptProperties.getProperty("batchNumber"));
  var processedBatches = scriptProperties.getProperty("processedBatches") == null ? 0 : parseInt(scriptProperties.getProperty("processedBatches"));

  console.log(`Total: ${reqs.length}.\n${batches.length} batches.`)

  if (processedBatches >= (batches.length - 1)) {
    console.log('All data has been processed already.');
  } else {
    //Start from the very last batch that stopped that needs to be processed.
    for (let i = startingBatch; i < batches.length; i++) {
      console.log(`Processing batch index #${parseInt(i)}`);
      try {
        var responses = UrlFetchApp.fetchAll(batches[i]);
        bucket.push(responses);
        console.log(`Fetched batches, pushed responses`);
        //Remove previous batch index number
        scriptProperties.deleteProperty("processedBatches");
        console.log(`Removed previous batch index number`);
        //Store latest sucessful batch index number
        scriptProperties.setProperty("processedBatches", parseInt(i));
        console.log(`Stored latest succcessful batch index number`);
      }
      //Catch the last batch index number where it stopped due to URL fetch exception
      catch (e) {
        console.log(`Catching the last batch index number`);
        //Remove the old batch number to be replaced with new batch number.
        scriptProperties.deleteProperty("batchNumber");
        console.log(`Removed old batch number`);
        //Remember the last batch that encountered and error to be processed again in the next call.
        scriptProperties.setProperty("batchNumber", parseInt(i));
        console.log(`Remember the last batch number`);
        console.log(`Batch index #${parseInt(i)} stopped`);
        break;
      }
    }

    const initialRes = [].concat.apply([], bucket);

    var temp = initialRes.reduce((ar, r) => {
      if (r.getResponseCode() == 200) {
        var { id, firstName, lastName, fullName, displayName, shortName, weight, height, position, dateOfBirth, hand, jersey, debutYear, birthPlace, experience, active } = JSON.parse(r.getContentText());
          var { abbreviation = null } = position || {};
          var { displayValue = null } = hand || {};
          var { city = null, state = null, country = null } = birthPlace || {};
          var { years = null } = experience || {};
        ar.push([id, firstName, lastName, fullName, displayName, shortName, weight, height, abbreviation, dateOfBirth, displayValue, jersey, debutYear, city, state, country, years, active]);
      }
      return ar;
    }, []);
    var res = [...temp];

    //Add table headers
    exportSheet.getLastRow() == 0 && exportSheet.appendRow(['IDs', 'First Name', 'Last Name', 'Full Name', 'Display Name', 'Short Name', 'Weight', 'Height', 'Position', 'DOB', 'Hand', 'Jersey', 'Debut Year', 'City', 'State', 'Country', 'Years', 'Active']);

    //Add table data
    var result = () => {
      return temp.length != 0 && exportSheet.getRange(exportSheet.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
    }
    result() && console.log(`Processed: ${res.length}.`);
  }
}

//Function to chunk the request data based on batch sizes
function batchArray(arr, batchSize) {
  var batches = [];

  for (var i = 0; i < arr.length; i += batchSize) {
    batches.push(arr.slice(i, i + batchSize));
  }

  return batches;
}

//Optional function to check the latest processed batch
function checkBatch() {
 console.log(`${parseInt(scriptProperties.getProperty("processedBatches"))+1} batches have been processed.`);
}

在我的搜索中,我还没有找到导入 API 链接的好例子,但我仍然没有找到任何可以解决将“批处理”与 try/catch 配对时出现的不一致问题的内容。所以我现在来这里寻求帮助。如果可以更轻松地发现问题,我已附上测试表,并且非常感谢我能获得的任何帮助!另外,如果需要或想要更多详细信息,请告诉我。

测试表:https://docs.google.com/spreadsheets/d/1yb-AeTTv7Z5pJNsc13s90bmT3uIOHSovswVzLwEQVkM/edit?usp=sharing

javascript google-sheets google-apps-script try-catch batch-processing
1个回答
0
投票

您有一个用于导入 12,000+ 个 URL 数据的脚本;该脚本使用复杂的批处理方法,但没有机制可以避免超时错误。

考虑这个答案;它与 OP 的代码非常相似,但有几个关键区别:

  • a single 脚本属性:
    lastBatchNumProcessed
  • 两个脚本变量:
    • batchesPerSession
      - 限制每次执行要处理的批次数量,并且
    • batchCounter
      - 计算已处理的批次
  • TRY
    代码包含一个新行来测试处理的批次数量。
    • if (batchCounter == batchesPerSession){
    • break
      如果为 true,如果为 false,则继续处理。
  • CATCH
    代码不会更改 Script 属性;代码简化了
  • batchArray()
    不变

var scriptProperties = PropertiesService.getScriptProperties();

function dataImport() {
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheet = ss.getSheetByName("Base JSON Import");
  var exportSheet = ss.getSheetByName("ImportData");

  var reqs = sheet.getRange("A2:A" + sheet.getLastRow()).getDisplayValues().reduce((ar, [url]) => {
    if (url) {
      ar.push({ url, muteHttpExceptions: true });
    }
    return ar;
  }, []);

  //Storage of current data
  var bucket = [];
  var batchSize = 200;
  var batches = batchArray(reqs, batchSize);
  Logger.log("DEBUG: FYI-Number of records = "+reqs.length+", batch size = "+batchSize+", so target number of row in batches = "+batches.length)

  // set a limit of batches to process to avoid timeout
  var batchesPerSession = 10
  var batchCounter = 0
     
  // get session variable(s)
  var lastBatchNumProcessed = scriptProperties.getProperty("lastBatchNumProcessed") == null ? 0 : parseInt(scriptProperties.getProperty("lastBatchNumProcessed"));
  Logger.log("DEBUG: Last batch number processed =  = "+lastBatchNumProcessed)


  // test whether there is any processing required.
  if (lastBatchNumProcessed >= (batches.length)) {
    Logger.log('DEBUG: All data has been processed already.');
  } else 
  {
    //Start from the last batch that was processed.
    for (let i = lastBatchNumProcessed; i < batches.length; i++) {
      Logger.log("DEBUG: i:"+i+", batch#"+(+i+1)+", rows in batch = "+batches[i].length)

      try {
        // Logger.log("DEBUG: get responses using urlFetchApp")
        var responses = UrlFetchApp.fetchAll(batches[i]);
        // Logger.log("DEBUG: push the responses into the bucket")
        bucket.push(responses);
        Logger.log("DEBUG: Fetched responses for i="+i+" batch#"+parseInt(i+1)+" pushed responses onto bucket, number of values in the bucket = "+bucket.length)
        
        //Update LastBatchNumProcessed
        scriptProperties.setProperty("lastBatchNumProcessed", parseInt((+i+1)));
        // Logger.log(DEBUG: `Updated latest succcessful batch number = `+scriptProperties.getProperty("lastBatchNumProcessed"));

        // count the batchses processed and stop processing when the limt is reached
        batchCounter = batchCounter+1
        if (batchCounter == batchesPerSession){
          // enough batches have been processed
          // Logger.log("DEBUG: Batches processed this session = limit of "+batchesPerSession)
          break
        }
        else{
          // display number of batches processed.
          // Logger.log("DEBUG: Batches processed this session = "+batchCounter)
        }

      }
      //Catch the last batch index number where it stopped due to URL fetch exception
      catch (e) {
        Logger.log(`DEBUG: Catching error`);
        Logger.log(`DEBUG: Catch: Batch index #${parseInt(i)} stopped`);
        break;
      }

    }
     
    // test whether any batches were processed this session
    if (batchCounter !=0){

      var ejbInitialRes = [].concat.apply([], bucket);
      //Logger.log("i:"+i+", the length of initial Res = "+ejbInitialRes.length)

      var temp = ejbInitialRes.reduce((ar, r) => {
      if (r.getResponseCode() == 200) {
      var { id, firstName, lastName, fullName, displayName, shortName, weight, height, position, dateOfBirth, hand, jersey, debutYear, birthPlace, experience, active } = JSON.parse(r.getContentText());
      var { abbreviation = null } = position || {};
      var { displayValue = null } = hand || {};
      var { city = null, state = null, country = null } = birthPlace || {};
      var { years = null } = experience || {};
      ar.push([id, firstName, lastName, fullName, displayName, shortName, weight, height, abbreviation, dateOfBirth, displayValue, jersey, debutYear, city, state, country, years, active]);
      }
      return ar;
      }, []); 
      var res = [...temp];

      //Add table headers
      exportSheet.getLastRow() == 0 && exportSheet.appendRow(['IDs', 'First Name', 'Last Name', 'Full Name', 'Display Name', 'Short Name', 'Weight', 'Height', 'Position', 'DOB', 'Hand', 'Jersey', 'Debut Year', 'City', 'State', 'Country', 'Years', 'Active']);
      Logger.log("DEBUG: target range = "+exportSheet.getRange(exportSheet.getLastRow() + 1, 1, res.length, res[0].length).getA1Notation()+", Number of rows = "+res.length+", number of columns = "+res[0].length)
      
      //Add table data
      var result = () => {
      return temp.length != 0 && exportSheet.getRange(exportSheet.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
      }
      result() && Logger.log(`DEBUG: number of records processed: ${res.length}.`); // append data and display message
    }
    else{
      // there are no transaction to process
      Logger.log("DEBUG: No Batches processed.")
    }
    
  } // end if else
}
© www.soinside.com 2019 - 2024. All rights reserved.