所以,这篇文章是我 10 个月前发表的一篇文章的延续。我将在本文中多次引用它,但这里有一个可返回的链接:导入大量 API 数据
首先,我一直在开发一个数据库,该数据库从数千个 API/JSON 链接中提取数据来创建数据。经过多次单独尝试后,导致代码出错、超时或彻底失败。我发了一篇文章来寻找答案,这就是我尝试使用“批处理”的原因。这是提供的代码,并进行了一些小的调整以适应数据,而不是“批处理”部分,它起作用了:
var scriptProperties = PropertiesService.getScriptProperties();
function dataImport() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName("Import1");
var exportSheet = ss.getSheetByName("Data1");
var reqs = sheet.getRange("A2:A" + sheet.getLastRow()).getDisplayValues().reduce((ar, [url]) => {
if (url) {
ar.push({ url, muteHttpExceptions: true });
}
return ar;
}, []);
//Storage of current data
var bucket = [];
var batchSize = 200;
var batches = batchArray(reqs, batchSize);
var startingBatch = scriptProperties.getProperty("batchNumber") == null ? 0 : parseInt(scriptProperties.getProperty("batchNumber"));
var processedBatches = scriptProperties.getProperty("processedBatches") == null ? 0 : parseInt(scriptProperties.getProperty("processedBatches"));
console.log(`Total: ${reqs.length}.\n${batches.length} batches.`)
if (processedBatches >= (batches.length - 1)) {
console.log('All data has been processed already.');
} else {
//Start from the very last batch that stopped that needs to be processed.
for (let i = startingBatch; i < batches.length; i++) {
console.log(`Processing batch index #${parseInt(i)}`);
try {
var responses = UrlFetchApp.fetchAll(batches[i]);
bucket.push(responses);
//Remove previous batch index number
scriptProperties.deleteProperty("processedBatches");
//Store latest sucessful batch index number
scriptProperties.setProperty("processedBatches", parseInt(i));
}
//Catch the last batch index number where it stopped due to URL fetch exception
catch (e) {
//Remove the old batch number to be replaced with new batch number.
scriptProperties.deleteProperty("batchNumber");
//Remember the last batch that encountered and error to be processed again in the next call.
scriptProperties.setProperty("batchNumber", parseInt(i));
console.log(`Batch index #${parseInt(i)} stopped`);
break;
}
}
const initialRes = [].concat.apply([], bucket);
var temp = initialRes.reduce((ar, r) => {
if (r.getResponseCode() == 200) {
var { id, firstName, lastName, fullName, displayName, shortName, weight, height, position: { abbreviation }, team: { $ref }, birthPlace: { city }, birthPlace: { state, country }, experience: { years, displayValue }, jersey, active } = JSON.parse(r.getContentText());
ar.push([id, firstName, lastName, fullName, displayName, shortName, weight, height, abbreviation, $ref, city, state, country, years, displayValue, jersey, active]);
}
return ar;
}, []);
var res = [...temp];
//Add table headers
exportSheet.getLastRow() == 0 && exportSheet.appendRow(['IDs', 'First Name', 'Last Name', 'Full Name', 'Display Name', 'Short Name', 'Weight', 'Height', 'Position', 'Team URL', 'City', 'State', 'Country', 'Years', 'Class', 'jersey', 'Active']);
//Add table data
var result = () => {
return temp.length != 0 && exportSheet.getRange(exportSheet.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
}
result() && console.log(`Processed: ${res.length}.`);
}
}
//Function to chunk the request data based on batch sizes
function batchArray(arr, batchSize) {
var batches = [];
for (var i = 0; i < arr.length; i += batchSize) {
batches.push(arr.slice(i, i + batchSize));
}
return batches;
}
//Optional function to check the latest processed batch
function checkBatch() {
console.log(`${parseInt(scriptProperties.getProperty("processedBatches"))+1} batches have been processed.`);
}
此代码应该处理然后将每个批次单独应用到“导入数据”表。当我多次运行代码时,它会增加
batchNumber
和 processedBatches
ScriptProperties。我会在 Apps 脚本中添加一个触发器,该触发器会在所有批次运行时每分钟运行一次代码。通过保存属性,目标是从本质上保存批处理的工作进度,以避免代码超时。当它工作时,每次运行代码大约需要 10-15 秒,然后每分钟触发器将应用所有批次。帖子中的答案更详细地介绍了它的工作原理,包括 GIF 演示,所以如果您想了解详细信息,它比我解释得更好!
快进到现在,我希望用最新信息更新我的数据,并尝试使用我之前使用的相同代码。令我沮丧的是,之前有效的“批处理”功能无法正常运行。很难用任何其他不一致的方式来描述,因为它已经完成了多种不同的事情,而我什至没有对代码进行任何真正的更改。有时,它会陷入处理循环,它将继续执行 try/catch 循环的“try”部分,将一批又一批列为“处理”,但实际上并不应用数据。处理将继续进行,直到代码本身超时,并且只会更新
processedBatches
属性,而不是 batchNumber
属性。这是我最常看到的选项。其他时候,我看到它处理它所在的任何批次,然后停止,只运行一次后不放入任何数据。然后在极少数情况下,它会在代码中放入一批,但我无法让它连续两次这样做。通常,当其中一批确实进入工作表时,下次运行它时,我将再次陷入处理循环。一般来说,除非之前的尝试成功,否则我一直在尝试多种操作,包括将两个属性设置为零并删除属性以尝试重置代码,但都没有产生成功的结果。另外,作为额外的参考,当代码运行正确时,我不需要手动调整属性,直到应用了所有批次,然后我会重置它,但在当前的尝试中还没有任何接近的结果。
我不确定到底该去哪里尝试解决这个问题。当我尝试将其组合在一起时,我已经感到迷失了,现在我不知道为什么它停止工作。我确实在代码中添加了额外的
console.log
来尝试查找可能发生错误的位置,这就是我发现“catch”处理不一致的地方。
作为参考,这是我一直在使用的代码的最新版本,并表明我不认为的大部分内部结构已被更改。事实上,甚至返回并将“批处理”部分重新粘贴到代码中以尝试使其正常工作:
var scriptProperties = PropertiesService.getScriptProperties();
function dataImport() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName("Base JSON Import");
var exportSheet = ss.getSheetByName("Import Data");
var reqs = sheet.getRange("A2:A" + sheet.getLastRow()).getDisplayValues().reduce((ar, [url]) => {
if (url) {
ar.push({ url, muteHttpExceptions: true });
}
return ar;
}, []);
//Storage of current data
var bucket = [];
var batchSize = 200;
var batches = batchArray(reqs, batchSize);
var startingBatch = scriptProperties.getProperty("batchNumber") == null ? 0 : parseInt(scriptProperties.getProperty("batchNumber"));
var processedBatches = scriptProperties.getProperty("processedBatches") == null ? 0 : parseInt(scriptProperties.getProperty("processedBatches"));
console.log(`Total: ${reqs.length}.\n${batches.length} batches.`)
if (processedBatches >= (batches.length - 1)) {
console.log('All data has been processed already.');
} else {
//Start from the very last batch that stopped that needs to be processed.
for (let i = startingBatch; i < batches.length; i++) {
console.log(`Processing batch index #${parseInt(i)}`);
try {
var responses = UrlFetchApp.fetchAll(batches[i]);
bucket.push(responses);
console.log(`Fetched batches, pushed responses`);
//Remove previous batch index number
scriptProperties.deleteProperty("processedBatches");
console.log(`Removed previous batch index number`);
//Store latest sucessful batch index number
scriptProperties.setProperty("processedBatches", parseInt(i));
console.log(`Stored latest succcessful batch index number`);
}
//Catch the last batch index number where it stopped due to URL fetch exception
catch (e) {
console.log(`Catching the last batch index number`);
//Remove the old batch number to be replaced with new batch number.
scriptProperties.deleteProperty("batchNumber");
console.log(`Removed old batch number`);
//Remember the last batch that encountered and error to be processed again in the next call.
scriptProperties.setProperty("batchNumber", parseInt(i));
console.log(`Remember the last batch number`);
console.log(`Batch index #${parseInt(i)} stopped`);
break;
}
}
const initialRes = [].concat.apply([], bucket);
var temp = initialRes.reduce((ar, r) => {
if (r.getResponseCode() == 200) {
var { id, firstName, lastName, fullName, displayName, shortName, weight, height, position, dateOfBirth, hand, jersey, debutYear, birthPlace, experience, active } = JSON.parse(r.getContentText());
var { abbreviation = null } = position || {};
var { displayValue = null } = hand || {};
var { city = null, state = null, country = null } = birthPlace || {};
var { years = null } = experience || {};
ar.push([id, firstName, lastName, fullName, displayName, shortName, weight, height, abbreviation, dateOfBirth, displayValue, jersey, debutYear, city, state, country, years, active]);
}
return ar;
}, []);
var res = [...temp];
//Add table headers
exportSheet.getLastRow() == 0 && exportSheet.appendRow(['IDs', 'First Name', 'Last Name', 'Full Name', 'Display Name', 'Short Name', 'Weight', 'Height', 'Position', 'DOB', 'Hand', 'Jersey', 'Debut Year', 'City', 'State', 'Country', 'Years', 'Active']);
//Add table data
var result = () => {
return temp.length != 0 && exportSheet.getRange(exportSheet.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
}
result() && console.log(`Processed: ${res.length}.`);
}
}
//Function to chunk the request data based on batch sizes
function batchArray(arr, batchSize) {
var batches = [];
for (var i = 0; i < arr.length; i += batchSize) {
batches.push(arr.slice(i, i + batchSize));
}
return batches;
}
//Optional function to check the latest processed batch
function checkBatch() {
console.log(`${parseInt(scriptProperties.getProperty("processedBatches"))+1} batches have been processed.`);
}
在我的搜索中,我还没有找到导入 API 链接的好例子,但我仍然没有找到任何可以解决将“批处理”与 try/catch 配对时出现的不一致问题的内容。所以我现在来这里寻求帮助。如果可以更轻松地发现问题,我已附上测试表,并且非常感谢我能获得的任何帮助!另外,如果需要或想要更多详细信息,请告诉我。
测试表:https://docs.google.com/spreadsheets/d/1yb-AeTTv7Z5pJNsc13s90bmT3uIOHSovswVzLwEQVkM/edit?usp=sharing
您有一个用于导入 12,000+ 个 URL 数据的脚本;该脚本使用复杂的批处理方法,但没有机制可以避免超时错误。
考虑这个答案;它与 OP 的代码非常相似,但有几个关键区别:
lastBatchNumProcessed
batchesPerSession
- 限制每次执行要处理的批次数量,并且batchCounter
- 计算已处理的批次TRY
代码包含一个新行来测试处理的批次数量。
if (batchCounter == batchesPerSession){
break
如果为 true,如果为 false,则继续处理。CATCH
代码不会更改 Script 属性;代码简化了batchArray()
不变var scriptProperties = PropertiesService.getScriptProperties();
function dataImport() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName("Base JSON Import");
var exportSheet = ss.getSheetByName("ImportData");
var reqs = sheet.getRange("A2:A" + sheet.getLastRow()).getDisplayValues().reduce((ar, [url]) => {
if (url) {
ar.push({ url, muteHttpExceptions: true });
}
return ar;
}, []);
//Storage of current data
var bucket = [];
var batchSize = 200;
var batches = batchArray(reqs, batchSize);
Logger.log("DEBUG: FYI-Number of records = "+reqs.length+", batch size = "+batchSize+", so target number of row in batches = "+batches.length)
// set a limit of batches to process to avoid timeout
var batchesPerSession = 10
var batchCounter = 0
// get session variable(s)
var lastBatchNumProcessed = scriptProperties.getProperty("lastBatchNumProcessed") == null ? 0 : parseInt(scriptProperties.getProperty("lastBatchNumProcessed"));
Logger.log("DEBUG: Last batch number processed = = "+lastBatchNumProcessed)
// test whether there is any processing required.
if (lastBatchNumProcessed >= (batches.length)) {
Logger.log('DEBUG: All data has been processed already.');
} else
{
//Start from the last batch that was processed.
for (let i = lastBatchNumProcessed; i < batches.length; i++) {
Logger.log("DEBUG: i:"+i+", batch#"+(+i+1)+", rows in batch = "+batches[i].length)
try {
// Logger.log("DEBUG: get responses using urlFetchApp")
var responses = UrlFetchApp.fetchAll(batches[i]);
// Logger.log("DEBUG: push the responses into the bucket")
bucket.push(responses);
Logger.log("DEBUG: Fetched responses for i="+i+" batch#"+parseInt(i+1)+" pushed responses onto bucket, number of values in the bucket = "+bucket.length)
//Update LastBatchNumProcessed
scriptProperties.setProperty("lastBatchNumProcessed", parseInt((+i+1)));
// Logger.log(DEBUG: `Updated latest succcessful batch number = `+scriptProperties.getProperty("lastBatchNumProcessed"));
// count the batchses processed and stop processing when the limt is reached
batchCounter = batchCounter+1
if (batchCounter == batchesPerSession){
// enough batches have been processed
// Logger.log("DEBUG: Batches processed this session = limit of "+batchesPerSession)
break
}
else{
// display number of batches processed.
// Logger.log("DEBUG: Batches processed this session = "+batchCounter)
}
}
//Catch the last batch index number where it stopped due to URL fetch exception
catch (e) {
Logger.log(`DEBUG: Catching error`);
Logger.log(`DEBUG: Catch: Batch index #${parseInt(i)} stopped`);
break;
}
}
// test whether any batches were processed this session
if (batchCounter !=0){
var ejbInitialRes = [].concat.apply([], bucket);
//Logger.log("i:"+i+", the length of initial Res = "+ejbInitialRes.length)
var temp = ejbInitialRes.reduce((ar, r) => {
if (r.getResponseCode() == 200) {
var { id, firstName, lastName, fullName, displayName, shortName, weight, height, position, dateOfBirth, hand, jersey, debutYear, birthPlace, experience, active } = JSON.parse(r.getContentText());
var { abbreviation = null } = position || {};
var { displayValue = null } = hand || {};
var { city = null, state = null, country = null } = birthPlace || {};
var { years = null } = experience || {};
ar.push([id, firstName, lastName, fullName, displayName, shortName, weight, height, abbreviation, dateOfBirth, displayValue, jersey, debutYear, city, state, country, years, active]);
}
return ar;
}, []);
var res = [...temp];
//Add table headers
exportSheet.getLastRow() == 0 && exportSheet.appendRow(['IDs', 'First Name', 'Last Name', 'Full Name', 'Display Name', 'Short Name', 'Weight', 'Height', 'Position', 'DOB', 'Hand', 'Jersey', 'Debut Year', 'City', 'State', 'Country', 'Years', 'Active']);
Logger.log("DEBUG: target range = "+exportSheet.getRange(exportSheet.getLastRow() + 1, 1, res.length, res[0].length).getA1Notation()+", Number of rows = "+res.length+", number of columns = "+res[0].length)
//Add table data
var result = () => {
return temp.length != 0 && exportSheet.getRange(exportSheet.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
}
result() && Logger.log(`DEBUG: number of records processed: ${res.length}.`); // append data and display message
}
else{
// there are no transaction to process
Logger.log("DEBUG: No Batches processed.")
}
} // end if else
}