更新网络刮刀

问题描述 投票:0回答:1

嗨,我终于能够设置我的webscraper并将数据导入我的网页:)

但是我的网页在端口3001上运行,网页刮板在端口8080上运行,我有点困惑,因为我可以设置一个计时器来在后台更新刮刀?

Index.js

var scraper = require(__dirname + '/scripts/scraper.js');
var express = require('express');
var path = require('path');
var app = express();

var MongoClient = require('mongodb').MongoClient;
var url = "mongodb://127.0.0.1:27017/test";

app.use(express.static(__dirname + '/public'));

// set the view engine to ejs
app.set('view engine', 'ejs');

// index page 
app.get('/', function(req, res) {
   MongoClient.connect(url, function(err, db) {
     if (err) throw err;
     var dbo = db.db("mydb");
     dbo.collection("customers").find({}).toArray(function(err, result) {
       if (err) throw err;
       res.render('pages/index', {
         result: result,
      });
        db.close();
      });
   });  
 });


app.listen(3001);
console.log('navigate to: http://178.62.253.206:3001');

它现在的工作方式是我需要导航到http://178.62.253.206:8080来更新网络刮刀。

我希望在后台默默地更新刮刀

scraper.js

var http = require('http');
var request = require('request');
var cheerio = require('cheerio');
var MongoClient = require('mongodb').MongoClient

http.createServer(function(req, res) {
  request('http://www.xscores.com/soccer', function(error, response,
    html) {
    if (!error && response.statusCode == 200) {
      var $ = cheerio.load(html);
      var list_items = "";
      var arr = [];
      var j = 1;


      // Step1 Get Data
      // Live Matches Even rows
      $('div.match_line.score_row.live_match.e_true ').each(function(i, 
element) {

    // Get Home team and apply changes to it 
    var hteam = $(this).attr('data-home-team');
    var hteam = hteam.toLowerCase();
    var hteam = ucwords(hteam); 
    var hteam = soccer(hteam);

    // Get Away team and apply changes to it 
    var ateam = $(this).attr('data-away-team');
    var ateam = ateam.toLowerCase();
    var ateam = ucwords(ateam); 
    var ateam = soccer(ateam);

    // Get Country and apply changes to it 
    var country = $(this).attr('data-country-name');
    var country = country.toLowerCase();
    var country = ucwords(country); 

    // Get League and apply changes to it 
    var league = $(this).attr('data-league-name');
    var league = league.toLowerCase();
    var league = ucwords(league); 

    // Adjust the KO Clock
    var Kickoff = $(this).attr('data-ko');
    var Kickoff = subtracthour(Kickoff);

    arr.push({
      hteam: hteam,
      ateam: ateam,
      j: j,
      statustype: $(this).attr('data-statustype'),
      country: country,
      league: league,
      Kickoff: Kickoff
    });
    j = j + 2;
    //list_items += "<li>" + a + "</li>";
    //console.log(arr.length);
  });
  var j = 2;


  // Step 1.5 Get Data
  // Live Matches, Odd rows
  $('div.match_line.score_row.live_match.o_true ').each(function(i, element) 
{
    // Get Home team and apply changes to it 
    var hteam = $(this).attr('data-home-team');
    var hteam = hteam.toLowerCase();
    var hteam = ucwords(hteam); 
    var hteam = soccer(hteam);

    // Get Away team and apply changes to it 
    var ateam = $(this).attr('data-away-team');
    var ateam = ateam.toLowerCase();
    var ateam = ucwords(ateam); 
    var ateam = soccer(ateam);

    // Get Country and apply changes to it 
    var country = $(this).attr('data-country-name');
    var country = country.toLowerCase();
    var country = ucwords(country); 

    // Get League and apply changes to it 
    var league = $(this).attr('data-league-name');
    var league = league.toLowerCase();
    var league = ucwords(league); 

    // Adjust the KO Clock
    var Kickoff = $(this).attr('data-ko');
    var Kickoff = subtracthour(Kickoff);

    arr.push({
      hteam: hteam,
      ateam: ateam,
      j: j,
      statustype: $(this).attr('data-statustype'),
      country: country,
      league: league,
      Kickoff: Kickoff
    });
    j = j + 2;
    //list_items += "<li>" + b + "</li>";
    //console.log(arr.length);
  });

  //Sort the data
  arr.sort(function(a, b) {
    return a.j - b.j
  })

  //Calculate which row to resume (odd/even)
  if (isEven(arr.length) == true){
      //console.log("Even Number");
      var j = arr.length + 1;
      var x = arr.length + 2;
  } else {
      //console.log("Odd Number");
      var j = arr.length + 2;
      var x = arr.length + 1;
  }

  //  Step 2 Get Data
  //  "sched" games from even rows
  $('div.match_line.score_row.other_match.e_true').each(function(i, element) 
{

    // Get Home team and apply changes to it 
    var hteam = $(this).attr('data-home-team');
    var hteam = hteam.toLowerCase();
    var hteam = ucwords(hteam); 
    var hteam = soccer(hteam);

    // Get Away team and apply changes to it 
    var ateam = $(this).attr('data-away-team');
    var ateam = ateam.toLowerCase();
    var ateam = ucwords(ateam); 
    var ateam = soccer(ateam);

    // Get Country and apply changes to it 
    var country = $(this).attr('data-country-name');
    var country = country.toLowerCase();
    var country = ucwords(country); 

    // Get League and apply changes to it 
    var league = $(this).attr('data-league-name');
    var league = league.toLowerCase();
    var league = ucwords(league); 

    // Adjust the KO Clock
    var Kickoff = $(this).attr('data-ko');
    var Kickoff = subtracthour(Kickoff);

    // Get GameStatus
    var gs = $(this).attr('data-statustype');

    if (gs != "sched") { return false;  }
    arr.push({
      hteam: hteam,
      ateam: ateam,
      j: j,
      statustype: $(this).attr('data-statustype'),
      country: country,
      league: league,
      Kickoff: Kickoff
    });
    j = j + 2;

    //list_items += "<li>" + a + "</li>";
    //console.log(arr.length);
  });

   //  Step 2.5 Get Data
  //Scrape "sched" games from odd rows
  var j = x
  $('div.match_line.score_row.other_match.o_true').each(function(i, element) {

    // Get Home team and apply changes to it 
    var hteam = $(this).attr('data-home-team');
    var hteam = hteam.toLowerCase();
    var hteam = ucwords(hteam); 
    var hteam = soccer(hteam);

     // Get Away team and apply changes to it 
    var ateam = $(this).attr('data-away-team');
    var ateam = ateam.toLowerCase();
    var ateam = ucwords(ateam); 
    var ateam = soccer(ateam);

    // Get Country and apply changes to it 
    var country = $(this).attr('data-country-name');
    var country = country.toLowerCase();
    var country = ucwords(country); 

    // Get League and apply changes to it 
    var league = $(this).attr('data-league-name');
    var league = league.toLowerCase();
    var league = ucwords(league); 

    // Get GameStatus
    var gs = $(this).attr('data-statustype');

    // Adjust the KO Clock
    var Kickoff = $(this).attr('data-ko');
    var Kickoff = subtracthour(Kickoff);

    if (gs != "sched") { return false; }
    arr.push({
      hteam: hteam,
      ateam: ateam,
      j: j,
      statustype: $(this).attr('data-statustype'),
      country: country,
      league: league,
      Kickoff: Kickoff
    });
    j = j + 2;
    //list_items += "<li>" + a + "</li>";
    //console.log(arr.length);
  });

  //Sort the data we fetched
  arr.sort(function(a, b) {
    return a.j - b.j
  })

  //Calculate which row to resume (odd/even)
  if (isEven(arr.length) == true){
      //console.log("Even Number");
      var j = arr.length + 2;
      var x = arr.length + 1;
  } else {
      //console.log("Odd Number");
      var j = arr.length + 1;
      var x = arr.length + 2;
  }


  // - Step 3 Get Data
  // - Scrape Finished matches games from even rows
  $('div.match_line.score_row.other_match.e_true').each(function(i, element) {
    // Get Home team and apply changes to it 
    var hteam = $(this).attr('data-home-team');
    var hteam = hteam.toLowerCase();
    var hteam = ucwords(hteam); 
    var hteam = soccer(hteam);

    // Get Away team and apply changes to it 
    var ateam = $(this).attr('data-away-team');
    var ateam = ateam.toLowerCase();
    var ateam = ucwords(ateam); 
    var ateam = soccer(ateam);

    // Get Country and apply changes to it 
    var country = $(this).attr('data-country-name');
    var country = country.toLowerCase();
    var country = ucwords(country); 

    // Get League and apply changes to it 
    var league = $(this).attr('data-league-name');
    var league = league.toLowerCase();
    var league = ucwords(league); 

    // Adjust the KO Clock
    var Kickoff = $(this).attr('data-ko');
    var Kickoff = subtracthour(Kickoff);

    // Get GameStatus
    var gs = $(this).attr('data-statustype');

    if (gs != 'sched'|| $(this).attr('data-ftr') ==  'true') { 
      arr.push({
        hteam: hteam,
        ateam: ateam,
        j: j,
        statustype: $(this).attr('data-statustype'),
        country: country,
        league: league,
        Kickoff: Kickoff
      });
      j = j + 2;
    }
  });

  // - Step 3.5 Get Data
  // - Scrape Finished matches games from even rows
  var j = x
  $('div.match_line.score_row.other_match.o_true').each(function(i, element) {

    // Get Home team and apply changes to it 
    var hteam = $(this).attr('data-home-team');
    var hteam = hteam.toLowerCase();
    var hteam = ucwords(hteam); 
    var hteam = soccer(hteam);

    // Get Away team and apply changes to it 
    var ateam = $(this).attr('data-away-team');
    var ateam = ateam.toLowerCase();
    var ateam = ucwords(ateam); 
    var ateam = soccer(ateam);

    // Get Country and apply changes to it 
    var country = $(this).attr('data-country-name');
    var country = country.toLowerCase();
    var country = ucwords(country); 

    // Get League and apply changes to it 
    var league = $(this).attr('data-league-name');
    var league = league.toLowerCase();
    var league = ucwords(league); 

    // Adjust the KO Clock
    var Kickoff = $(this).attr('data-ko');
    var Kickoff = subtracthour(Kickoff);

    // Get GameStatus
    var gs = $(this).attr('data-statustype');

    if (gs != 'sched'|| $(this).attr('data-ftr') ==  'true') { 
      arr.push({
        hteam: hteam,
        ateam: ateam,
        j: j,
        statustype: $(this).attr('data-statustype'),
        country: country,
        league: league,
        Kickoff: Kickoff
      });
      j = j + 2
    }
  });

  //Sort the data
  arr.sort(function(a, b) {
    return a.j - b.j
  })

  //Output from the array to html on server page
  var arrayLength = arr.length;
  var columns = ""
  for (var i = 0; i < arrayLength; i++) {
    let row = "" 

    if (i < arr.length) {
        row += "<td>" + arr[i].j + "</td>"
        row += "<td>" + arr[i].Kickoff + "</td>"
        row += "<td>" + arr[i].statustype + "</td>"
        row += "<td>" + arr[i].country + "</td>"
        row += "<td>" + arr[i].league + "</td>"
        row += "<td>" + arr[i].hteam + "</td>"
        row += "<td>" + arr[i].ateam + "</td>"
    } 
    columns += "<tr>" + row + "</tr>";
  }



  //var html = "<table><tr>" + columns + "</tr></table>"
  //res.writeHead(200, {
  //  'Content-Type': 'text/html'
  //});
  //res.end(html);

  // MongoDataBase
  MongoClient.connect('mongodb://127.0.0.1:27017/test', function (err, db) {
  if (err) throw err;
    var dbo = db.db("mydb");

    // Delete old records
    dbo.dropCollection("customers", function(err, delOK) {
      if (err) throw err;
      if (delOK) console.log("Collection deleted");
    });

    // Add new records
    dbo.collection("customers").insertMany(arr, function(err, res) {
    if (err) throw err;
      console.log("Collection created!");
      db.close();
    });
  });
  // End MongoDataBase
  //console.log(arr.length);
}
});

}).listen(8080);
console.log('Server is running at http://178.62.253.206:8080/');


function isEven(n) {
  return n == parseFloat(n)? !(n%2) : void 0;
}
function ucwords (str) {
   return (str + '').replace(/^([a-z])|\s+([a-z])/g, function ($1) {
    return $1.toUpperCase();
});
}

function subtracthour(str) {
   var m = str.slice(3, 5);
   var h = str.slice(0, 2);

   if (h == '00') { h = "24"; }
   var h = Number(h) - Number(1);
   if (h <= 9) { h = "0" + h; }

  var y = h + ":" + m;
  return y;    // Note this was return str
}
function soccer(str) {
  var x = str
  var x = x.replace(" Ff", " FF");
 return x    
}

弗雷德里克

node.js web-scraping
1个回答
2
投票

我看到三个选择:

  • 轮询
  • Websockets [推荐]
  • IPC

Differences between websockets and long polling for turn based game server

的WebSockets

在2018年,我建议使用websockets,因为它的CPU密集度更低,更简单。 Socket.io非常适合您的需求。

在服务器端,您可以每2秒向所有连接的客户端广播更新的webscraper数据。

setInterval(function(){
    data = updateData();
    io.sockets.emit('webscraper data', data);
}, 1000);

在客户端,您可以捕获该事件,并更新您的页面。

socket.on('webscraper data', function(data){
    $('#data').... // udpate dom here
});

这是一个使用socket.io的基本聊天教程的链接,它可以帮助您启动并运行:https://socket.io/get-started/chat/

长轮询

以下是使用快速EventEmitterNode.js: Long-polling with EventEmitter and Express 4.x | Catching request close进行长轮询的示例

© www.soinside.com 2019 - 2024. All rights reserved.