https://www.cbf.com.br/futebol-brasileiro/competicoes/campeonato-brasileiro-serie-a/2018/1?ref=botao
我想从上面的页面获取市场文本。 (“Sábado,14 de Abril de 2018”和“16:00”)。
我用kotlin和jsoup库做了这个:
val date = select("div.col-sm-8 > span.text-2")[1] //Sábado, 14 de Abril de 2018
val time = select("div.col-sm-8 > span.text-2")[2] //16:00
这个查询div.col-sm-8 > span.text-2
返回一个数组,我简单地使用索引获取正确的信息。
但由于其他问题,我必须使用javascript。
我尝试使用JavaScript和Cherio库做同样的事情,但似乎它的工作方式不一样,即使两个搜索模式都基于JQuery:
const scherio = require('cheerio');
const rp = require('request-promise');
/**
* @type {string}
*/
const baseurl = "https://www.cbf.com.br/futebol-brasileiro/competicoes/campeonato-brasileiro-serie-a/2018/";
const turn = 190;
let totalGames = 1;
const gamesPerRound = 10;
module.exports =
class FetchRoundsFromCbf {
fetchRounds() {
for (let i = 1; i <= totalGames; i++) {
let url = baseurl.concat(i.toString());
rp(url).then(function (html) {
const $ = scherio.load(html);
let date = $("div.col-sm-8 > span.text-2")[1];
let time = $("div.col-sm-8 > span.text-2")[2];
console.log(date.text());
console.log(time.text());
});
}
}
}
给我:
Unhandled rejection TypeError: date.text is not a function
at /home/alexandre/dev/flutter/brasileiro-parser-js/network/fetchdata/FetchRoundsFromCbf.js:32:39
at tryCatcher (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/util.js:16:23)
at Promise._settlePromiseFromHandler (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/promise.js:512:31)
at Promise._settlePromise (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/promise.js:569:18)
at Promise._settlePromise0 (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/promise.js:614:10)
at Promise._settlePromises (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/promise.js:694:18)
at _drainQueueStep (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/async.js:138:12)
at _drainQueue (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/async.js:131:9)
at Async._drainQueues (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/async.js:147:5)
at Immediate.Async.drainQueues [as _onImmediate] (/home/alexandre/dev/flutter/brasileiro-parser-js/node_modules/bluebird/js/release/async.js:17:14)
at processImmediate (timers.js:637:19)
然后我只打印查询结果:
console.log(date);
console.log(time);
我收到:
{ type: 'tag',
name: 'span',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object: null prototype] { class: 'text-2 p-r-20' },
'x-attribsNamespace': [Object: null prototype] { class: undefined },
'x-attribsPrefix': [Object: null prototype] { class: undefined },
children:
[ { type: 'tag',
name: 'i',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [],
parent: [Circular],
prev: null,
next: [Object] },
{ type: 'text',
data: ' Sábado, 14 de Abril de 2018',
parent: [Circular],
prev: [Object],
next: null } ],
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object: null prototype] { class: 'col-sm-8' },
'x-attribsNamespace': [Object: null prototype] { class: undefined },
'x-attribsPrefix': [Object: null prototype] { class: undefined },
children:
[ [Object],
[Object],
[Object],
[Circular],
[Object],
[Object],
[Object] ],
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Object],
next: [Object] },
prev:
{ type: 'text',
data: '\n ',
parent: [Object],
prev: null,
next: [Circular] },
next:
{ type: 'text',
data: '\n ',
parent: [Object],
prev: [Circular],
next: [Object] } },
prev:
{ type: 'text',
data: '\n ',
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Object],
next: [Object] },
prev:
{ type: 'tag',
name: 'span',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Object],
next: [Circular] },
next: [Circular] },
next:
{ type: 'text',
data: '\n ',
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Object],
next: [Object] },
prev: [Circular],
next:
{ type: 'tag',
name: 'span',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Circular],
next: [Object] } } }
{ type: 'tag',
name: 'span',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object: null prototype] { class: 'text-2 p-r-20' },
'x-attribsNamespace': [Object: null prototype] { class: undefined },
'x-attribsPrefix': [Object: null prototype] { class: undefined },
children:
[ { type: 'tag',
name: 'i',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [],
parent: [Circular],
prev: null,
next: [Object] },
{ type: 'text',
data: ' 16:00',
parent: [Circular],
prev: [Object],
next: null } ],
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object: null prototype] { class: 'col-sm-8' },
'x-attribsNamespace': [Object: null prototype] { class: undefined },
'x-attribsPrefix': [Object: null prototype] { class: undefined },
children:
[ [Object],
[Object],
[Object],
[Object],
[Object],
[Circular],
[Object] ],
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Object],
next: [Object] },
prev:
{ type: 'text',
data: '\n ',
parent: [Object],
prev: null,
next: [Circular] },
next:
{ type: 'text',
data: '\n ',
parent: [Object],
prev: [Circular],
next: [Object] } },
prev:
{ type: 'text',
data: '\n ',
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Object],
next: [Object] },
prev:
{ type: 'tag',
name: 'span',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Object],
next: [Circular] },
next: [Circular] },
next:
{ type: 'text',
data: '\n ',
parent:
{ type: 'tag',
name: 'div',
namespace: 'http://www.w3.org/1999/xhtml',
attribs: [Object],
'x-attribsNamespace': [Object],
'x-attribsPrefix': [Object],
children: [Array],
parent: [Object],
prev: [Object],
next: [Object] },
prev: [Circular],
next: null } }
我不是很擅长javascript,我该如何检索我需要的信息?
您可以使用eq()
通过索引the same way as in jQuery获取Cheerio元素。
let date = $("div.col-sm-8 > span.text-2").eq(1);
let time = $("div.col-sm-8 > span.text-2").eq(2);
eq()
将匹配元素集减少到指定索引处的元素集。
我使用切片管理了我想要的东西:
let date = $("div.col-sm-8").find("span").slice(1);
let time = $("div.col-sm-8").find("span").slice(2);
console.log(date.text());
console.log(time.text());