diff --git a/src/cli.ts b/src/cli.ts index c15da24..1632872 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -5,7 +5,7 @@ import fs from "fs"; import { getDomain } from "tldts"; import { Browser, launch } from "puppeteer"; import axios from "axios"; -import { Dobey, EP, LDJsonParser, Macrovet } from "./modules/websites"; +import { CoolBlue, Dobey, EP, LDJsonParser, Macrovet } from "./modules/websites"; import { createObjectCsvWriter } from "csv-writer"; import { CsvWriter } from "csv-writer/src/lib/csv-writer"; import { ObjectMap } from "csv-writer/src/lib/lang/object"; @@ -28,7 +28,9 @@ start(); async function debugStart() { browser = await launch({ headless: false }); - console.log(await crawlProductStock("https://www.hondenbed.nl/hondenkussen-taupe-bruin.html?utm_medium=affiliate&utm_source=tradetracker")); + console.log(await crawlProductStock("https://prf.hn/click/camref:1100leWsm/destination:https%3A%2F%2Fwww.coolblue.nl%2Fproduct%2F856992%2Fmio-cyclo-discover.html")); + // console.log(await crawlProductStock("https://www.coolblue.nl/product/923036/hp-deskjet-2720e-all-in-one.html?clickref=1101lwW9ebAE&utm_source=performancehorizon&utm_medium=affiliate&utm_campaign=Comparison%2FReview&utm_content=1101lwW9ebAE&utm_term=1100leWsm&ref=293530&PHGref=1101lwW9ebAE&cmt=c_ph%2Capm_Comparison%2FReview_%2Cacid1101l93%2Cacr_1100leWsm%2Caclr_1101lwW9ebAE")); + // console.log(await crawlProductStock("https://www.coolblue.nl/product/882996/canon-pixma-ts-3450-zwart.html?clickref=1011lwWDBFEL&utm_source=performancehorizon&utm_medium=affiliate&utm_campaign=Comparison%2FReview&utm_content=1011lwWDBFEL&utm_term=1100leWsm&ref=293530&PHGref=1011lwWDBFEL&cmt=c_ph%2Capm_Comparison%2FReview_%2Cacid1101l93%2Cacr_1100leWsm%2Caclr_1011lwWDBFEL")); } async function start() { @@ -134,7 +136,7 @@ async function crawlUrl(url: string) { await page.goto(url, { waitUntil: 'networkidle2' }); // Get html from page - const html = await page.evaluate(() => document.body.innerHTML); + const html = await page.evaluate(() => document.documentElement.outerHTML); // Close page await page.close(); @@ -202,6 +204,8 @@ async function crawlUrl(url: string) { in_stock: products[index].in_stock, blog_url: products[index].blog_url, }]); + + await wait(1000); } catch (error) { console.log(` [ERROR] ${products[index].product_name} - ${products[index].domain} - ${products[index].product_url}`, error); continue; @@ -219,13 +223,11 @@ async function crawlProductStock(url: string): Promise<[string, string, boolean] try { // Open new page and goto url const page = await browser.newPage(); + page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"); await page.goto(url, { waitUntil: 'networkidle2' }); // Get html from page - const html = await page.evaluate(() => document.body.innerHTML); - - // Load html in cheerio object - const $ = cheerio.load(html); + const html = await page.evaluate(() => document.documentElement.outerHTML); // Get domain from url const domain = getDomain(page.url()); @@ -253,7 +255,7 @@ async function crawlProductStock(url: string): Promise<[string, string, boolean] return [domain, page.url(), await LDJsonParser.check(html)]; case "coolblue.nl": - return [domain, page.url(), await LDJsonParser.check(html)]; + return [domain, page.url(), await CoolBlue.check(html)]; case "bcc.nl": return [domain, page.url(), await LDJsonParser.check(html)]; @@ -284,4 +286,8 @@ async function crawlProductStock(url: string): Promise<[string, string, boolean] console.error(error); console.log("-- Error while trying to crawl page! Skipping..."); } +} + +function wait(ms: number) { + return new Promise(resolve => setTimeout(resolve, ms)); } \ No newline at end of file diff --git a/src/modules/websites.ts b/src/modules/websites.ts index 89efc73..8f2b295 100644 --- a/src/modules/websites.ts +++ b/src/modules/websites.ts @@ -21,6 +21,7 @@ export namespace LDJsonParser { const $ = cheerio.load(html); $('script[type="application/ld+json"]').each((index, element) => { snippets.push($(element).html()); + }); for (const snippet of snippets) { @@ -43,6 +44,38 @@ export namespace LDJsonParser { } } } catch (error) { + console.log(error); + continue; + } + } + + return false; + } catch (error) { + console.log(error); + console.error(`Error occured during stock check!`); + return false; + } + } +} + +export namespace CoolBlue { + export async function check(html: string) { + try { + const snippets: Array = []; + const $ = cheerio.load(html); + $('script[type="application/ld+json"]').each((index, element) => { + snippets.push($(element).html()); + }); + + for (const snippet of snippets) { + try { + if (snippet.includes(`"@type": "Product"`)) { + if (snippet.includes(`"availability": "https://schema.org/InStock"`)) { + return true; + } + } + } catch (error) { + console.log(error); continue; } }