async function downloadArticle(pageNumber){ var url = PAGE + pageNumber; console.log("current page: " + url); var pageOptions = { url: url, method: "GET", headers: { "Accept": "text/html" } }; try{ var downloadedPage = await getArticles(pageOptions, pageNumber); console.log('downloaded Page number: ' + downloadedPage); } catch(error){ console.error(error); } }
async用来表示函数是异步的,定义的函数会返回一个promise对象,可以使用then方法添加回调函数。
而await 可以理解为是 async wait 的简写。await 必须出现在 async 函数内部,不能单独使用。我这里的await后面带的getArticles,其实是一个promise包裹的nodejs request module调用,源代码如下:
function getArticles(pageOptions, pageNumber) { return new Promise(function(resolve,reject){ var requestC = request.defaults({jar: true}); requestC(pageOptions,function(error,response,body){ if( error){ console.log("error: " + error); resolve(error); } var document = new JSDOM(body).window.document; var content = document.getElementsByTagName("li"); for( var i =0; i < content.length; i++){ var li = content[i]; var children = li.childNodes; for( var j = 0; j < children.length; j++){ var eachChild = children[j]; if( eachChild.nodeName == "DIV"){ var grandChild = eachChild.childNodes; for( var k = 0; k < grandChild.length; k++){ var grand = grandChild[k]; if( grand.nodeName == "A"){ var fragment = grand.getAttribute("href"); if( fragment.indexOf("/p") < 0) continue; console.log("title: " + grand.text); var wholeURL = PREFIX + fragment; console.log("url: " + wholeURL); if( mArticleResult.has(grand.text)){ lastPageReached = true; console.log("article size: " + mArticleResult.size); resolve(pageNumber); } mArticleResult.set(grand.text, wholeURL); } } } } }// end of outer loop resolve(pageNumber); }); }); }
在nodejs里执行上面这段代码,输出: