1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
| "use strict"; const cheerio = require("cheerio"); const Nightmare = require('nightmare'); const nightmare = Nightmare({ show: true }); const Database = require('better-sqlite3'); const vo = require('vo'); const db = new Database('nstrs.db', { verbose: console.log });
const myURL = "https://www.nstrs.cn/kjbg/navigation"; const PageNum = 17567;
var run = function* () { yield nightmare.goto(myURL) .click('#a1') .wait(function () { return (document.getElementById("s2").innerText === "175670"); }); for (var i = 1; i <= PageNum; i++) { yield nightmare.evaluate(function (i) { document.getElementById("pagevalue").value = i; }, i); yield nightmare.click('#search'); yield nightmare.wait(function (i) { return (document.getElementById("s3").innerText == i); }, i); yield nightmare.evaluate(() => document.querySelector('.GJKJBG2013_Table1').innerHTML).then(function (html) { html = html.replace('tbody', 'table'); const $ = cheerio.load(html); var trs_tds = $('html').find('tr'); var trs = trs_tds.nextAll(); var info; for (var j = 0; j < trs.length; j++) { var $a = cheerio.load($(trs[j].childNodes[2]).html()); info = { id: $(trs[j].childNodes[0]).text(), author: $(trs[j].childNodes[4]).text(), organization: $(trs[j].childNodes[6]).text(), title: $a('a').text(), year: $(trs[j].childNodes[8]).text(), url: "https://www.nstrs.cn/kjbg/" + $a('a').attr('href'), absctractcn: "", absctracten: "", keywordcn: "", keyworden: "" }; const insertbasic = db.prepare('INSERT INTO info (id, author, organization, title, year, url,absctractcn, absctracten, keywordcn, keyworden) VALUES (@id, @author, @organization, @title, @year, @url, @absctractcn, @absctracten, @keywordcn, @keyworden)'); insertbasic.run(info); } }); } yield nightmare.end(); }; vo(run)(function (err) { console.dir(err); console.log('done'); });
|