我正在学习tensorflow.js,这可能是我尝试编写的第一个完整程序。然而我感觉有些不对劲。我的模型无法预测正确的天气,并且无论我如何训练它,它总是预测“雾霾”。以下是我正在运行的用于训练和预测的示例 NodeJS 程序。请帮我弄清楚这里出了什么问题:
// Install the necessary packages:
// npm install @tensorflow/tfjs-node
const _ = require('lodash'),
fs = require('fs'),
tf = require('@tensorflow/tfjs-node');
var data = require('./data2.json');
// Sample data (replace with your actual data)
const features = [];
const labels = [];
for (var i = 0; i < data.length; i++) {
features.push([data[i].temp, data[i].wind, data[i].humidity, data[i].pressure, data[i].visibility, yearToDateTime(data[i].date, data[i].time)]);
labels.push(data[i].weatherType);
}
var uniqLabels = _.uniq(labels);
(async() => {
// Create a simple neural network model
var model = null;
if (fs.existsSync('./kolkata-weather-model/model.json')) {
model = await tf.loadLayersModel('file://./kolkata-weather-model/model.json');
}
if (model == null) {
model = tf.sequential();
model.add(tf.layers.dense({ units: 1, activation: 'sigmoid', inputShape: [6] }));
model.add(tf.layers.dense({ units: uniqLabels.length, activation: 'sigmoid' }));
// Compile the model
model.compile({ optimizer: 'sgd', loss: 'sparseCategoricalCrossentropy', metrics: ['accuracy'] });
// Train the model
const xs = tf.tensor2d(features);
const ys = tf.tensor1d(_.map(labels, (o) => { return _.indexOf(uniqLabels, o); }));
model.fit(xs, ys, { epochs: 100 }).then(() => {
model.save('file://./kolkata-weather-model');
// Example prediction for tomorrow's weather
getTomorrowPrediction(model);
});
} else {
getTomorrowPrediction(model);
}
})();
function getTomorrowPrediction(model) {
const tomorrowFeatures = [10, 2, 35, 1008, 30, yearToDateTime('20240101', '15:30')]; // Replace with actual values
const prediction = model.predict(tf.tensor2d([tomorrowFeatures])).argMax(1).dataSync()[0];
console.log('Weather prediction for tomorrow:', uniqLabels[prediction]);
}
function yearToDateTime(dt, time) {
if (!_.isNil(dt) && dt.length == 8 && !_.isNil(time) && time.length == 5) {
var yr = dt.substring(0, 4);
var mon = dt.substring(4, 6);
var day = dt.substring(6);
var firstDayOfYr = Date.parse(yr + '-01-01 00:00:00.000');
var todayDate = Date.parse(yr + '-' + mon + '-' + day + ' ' + time + ':00.000');
return todayDate - firstDayOfYr;
}
return -1;
}
function getWeatherTypeName(id) {
if (id == 1) {
return 'Sunny';
} else if (id == 2) {
return 'Cloudy';
} else if (id == 3) {
return 'Rainy';
} else {
return 'Unknown';
}
}
function getWeatherType(weatherType) {
weatherType = weatherType
.replace('Cool', 1)
.replace('Mild', 1)
.replace('Clear', 1)
.replace('Fog', 2)
.replace('Haze', 1)
.replace('Passing clouds', 2)
.replace('Partly sunny', 1)
.replace('Scattered clouds', 2)
.replace('Drizzle Broken clouds.', 3)
.replace('Broken clouds', 2)
.replace('Light rain Partly cloudy.', 3)
.replace('Rain Partly cloudy.', 3)
.replace('Light rain Broken clouds.', 3)
.replace('Partly cloudy', 2)
.replace('Warm', 1)
.replace('Thunderstorms Partly cloudy.', 2)
.replace('Thunderstorms Passing clouds.', 2)
.replace('Thunderstorms Broken clouds.', 2)
.replace('Thunderstorms Partly sunny.', 2)
.replace('Light rain Passing clouds.', 3)
.replace('Light rain Scattered clouds.', 3)
.replace('Thunderstorms Scattered clouds.', 2)
.replace('Thunderstorms More clouds than sun.', 2)
.replace('More clouds than sun', 2)
.replace('Light rain More clouds than sun.', 3)
.replace('Hot', 1)
.replace('Strong thunderstorms Mostly cloudy.', 2)
.replace('Thunderstorms Mostly cloudy.', 2)
.replace('Light rain Mostly cloudy.', 3)
.replace('Drizzle More clouds than sun.', 3)
.replace('Drizzle Mostly cloudy.', 3)
.replace('Rain Broken clouds.', 3)
.replace('Rain Mostly cloudy.', 3)
.replace('Mostly cloudy', 2)
.replace('Rain More clouds than sun.', 3)
.replace('Thundershowers Broken clouds.', 2)
.replace('Sunny', 1)
.replace('', '-1')
.replace('Smoke', 2)
.replace('Thundershowers Partly cloudy.', 3)
.replace('Rain Cloudy.', 3)
.replace('Heavy rain More clouds than sun.', 3)
.replace('Heavy rain Mostly cloudy.', 3)
.replace('Strong thunderstorms More clouds than sun.', 2)
.replace('Extremely hot', 1)
.replace('Strong thunderstorms Broken clouds.', 2)
.replace('Rain Scattered clouds.', 3)
.replace('Heavy rain Broken clouds.', 3)
.replace('Light rain Partly sunny.', 3)
.replace('Light fog', 1)
.replace('Strong thunderstorms Partly cloudy.', 2)
.replace('Rain showers More clouds than sun.', 3)
.replace('Rain Partly sunny.', 3)
.replace('Strong thunderstorms Cloudy.', 2)
.replace('Thunderstorms Cloudy.', 2)
.replace('Rain Passing clouds.', 3)
.replace('Rain Overcast.', 3)
.replace('Light rain Overcast.', 3)
.replace('Heavy rain Cloudy.', 3)
.replace('Light rain Cloudy.', 3)
.replace('Hail Mostly cloudy.', 3)
.replace('Thunderstorms Overcast.', 2)
.replace('Overcast', 2)
.replace('Drizzle Cloudy.', 3)
.replace('Light rain Fog.', 3)
.replace('Strong thunderstorms Scattered clouds.', 2)
.replace('Cloudy', 2)
.replace('Strong thunderstorms Partly sunny.', 2)
.replace('Strong thunderstorms Passing clouds.', 2);
return parseInt(weatherType, 10);
}
我的数据样本是这样的,用于训练(data2.json):
[ {
"date": "20140101",
"time": "00:20",
"temp": 14,
"weatherType": "Cool",
"wind": null,
"humidity": 88,
"pressure": 1016,
"visibility": 0
},
{
"date": "20200720",
"time": "21:00",
"temp": 28,
"weatherType": "Passing clouds",
"wind": null,
"humidity": 94,
"pressure": 1003,
"visibility": 21
},
{
"date": "20150801",
"time": "05:50",
"temp": 25,
"weatherType": "Heavy rain More clouds than sun.",
"wind": 15,
"humidity": 100,
"pressure": 995,
"visibility": 5
},
...
]
在第 41 纪元之后,训练模型时 acc 和 loss 没有变化。而且我还觉得无论我提供什么输入,预测都保持不变。我正在使用加尔各答 10 年的天气数据来训练模型。对于任何输入,我当然期待比“雾霾”更好的预测。您能否帮助我正确选择一些内容,以便模型能够更准确地预测结果? - 使用多少个致密层?密集层的配置(激活、单位和形状)应该是什么?编译模型时使用哪些优化器和损失函数?
我认为这里的主要问题是神经网络的大小。日期和天气的相关性应该比较强,我们称之为气候。据我所知,对这种相关性的了解是古代农业的重要里程碑。今天和明天的天气之间的相关性也可能有些显着,但是有很多外部变量,这些变量无法通过单站测量获得(锋面系统、风携带的云......)。
您应该能够至少找到输入和输出之间的一些相关性,但您的网络没有空间来执行任何有用的数学运算。 当我查看你的网络时,你有 6 个输入(温度、风、湿度、压力、能见度和日期),然后是连接到这些输入的单个神经元,然后是一些输出神经元,每个神经元对应一种天气类型。这根本行不通。
单个人工神经元基本上只是一个非常简单的数学方程。另外,它还有称为权重的内部参数。权重是在训练期间调整的东西。每个输入一个,再加上一个阈值。神经元首先获取所有输入,将每个输入与其权重相乘,然后将它们相加。结果被传递给传递函数,在你的例子中是 sigmoid。然后将阈值权重添加到该结果中,并将其传递给激活函数,在您的情况下,只需默认线性激活 a(x) = x。单个神经元的输出是单个浮点值。
如您所见,您已将所有天气信息简化为单个浮点值,该浮点值无法容纳它。你试图用只有 7 个自由度的方程来描述所有天气的相关性。尝试尝试网络的形状和大小,也许您可以获得更好的结果。
我还会向您推荐这个tensorflow playgound,其中只有两个输入 X 和 Y,数据集是两种颜色的点。您可以添加或删除层、在每层上添加神经元、添加由某些非线性数学函数(例如 X 平方、sin(Y)...)预处理的数据。它不是人工智能最有用的应用程序,但它很漂亮且直观,非常适合学习基础知识。