From b617ac85fe279fdb341e372158a73118c93e0315 Mon Sep 17 00:00:00 2001 From: Nick Leeman Date: Fri, 20 Nov 2020 19:51:19 +0100 Subject: [PATCH] added report system --- package-lock.json | 65 +++++++++++++++++++++++++++++++++++++++-- package.json | 8 +++-- report.txt | 15 ++++++++++ src/core.ts | 57 ++++++++++++++++++++++++++++++++---- src/modules/websites.ts | 7 ++++- 5 files changed, 142 insertions(+), 10 deletions(-) create mode 100644 report.txt diff --git a/package-lock.json b/package-lock.json index bb590d7..6e47023 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,15 @@ "@types/node": "*" } }, + "@types/fs-extra": { + "version": "9.0.4", + "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-9.0.4.tgz", + "integrity": "sha512-50GO5ez44lxK5MDH90DYHFFfqxH7+fTqEEnvguQRzJ/tY9qFrMSHLiYHite+F3SNmf7+LHC1eMXojuD+E3Qcyg==", + "dev": true, + "requires": { + "@types/node": "*" + } + }, "@types/jsonfile": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.0.0.tgz", @@ -36,6 +45,15 @@ "@types/node": "*" } }, + "@types/write": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@types/write/-/write-2.0.0.tgz", + "integrity": "sha512-SYY3QVxIxwyQo2Esqw1UxeQO2PwIRTNyCXCXS+fT1L+0SkIi0ahUiR3FNYOvST5nzfNG11d/gm49Yks/4PS60w==", + "dev": true, + "requires": { + "@types/node": "*" + } + }, "@types/yauzl": { "version": "2.9.1", "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.9.1.tgz", @@ -45,11 +63,24 @@ "@types/node": "*" } }, + "add-filename-increment": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/add-filename-increment/-/add-filename-increment-1.0.0.tgz", + "integrity": "sha512-pFV8VZX8jxuVMIycKvGZkWF/ihnUubu9lbQVnOnZWp7noVxbKQTNj7zG2y9fXdPcuZ6lAN3Drr517HaivGCjdQ==", + "requires": { + "strip-filename-increment": "^2.0.1" + } + }, "agent-base": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-5.1.1.tgz", "integrity": "sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==" }, + "at-least-node": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/at-least-node/-/at-least-node-1.0.0.tgz", + "integrity": "sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg==" + }, "balanced-match": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", @@ -227,6 +258,24 @@ "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==" }, + "fs-extra": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-9.0.1.tgz", + "integrity": "sha512-h2iAoN838FqAFJY2/qVpzFXy+EBxfVE220PalAqQLDVsFOHLJrZvut5puAbCdNv6WJk+B8ihI+k0c7JK5erwqQ==", + "requires": { + "at-least-node": "^1.0.0", + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^1.0.0" + }, + "dependencies": { + "universalify": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-1.0.0.tgz", + "integrity": "sha512-rb6X1W158d7pRQBg5gkR8uPaSfiids68LTJQYOtEUhoJUWBdaQHsuT/EUduxXYxcrt4r5PJ4fuHW1MHT6p0qug==" + } + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", @@ -256,8 +305,7 @@ "graceful-fs": { "version": "4.2.4", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz", - "integrity": "sha512-WjKPNJF79dtJAVniUlGGWHYGz2jWxT6VhN/4m1NdkbZ2nOsEF+cI1Edgql5zCRhs/VsQYRvrXctxktVXZUkixw==", - "optional": true + "integrity": "sha512-WjKPNJF79dtJAVniUlGGWHYGz2jWxT6VhN/4m1NdkbZ2nOsEF+cI1Edgql5zCRhs/VsQYRvrXctxktVXZUkixw==" }, "htmlparser2": { "version": "3.10.1", @@ -482,6 +530,11 @@ "safe-buffer": "~5.2.0" } }, + "strip-filename-increment": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-filename-increment/-/strip-filename-increment-2.0.1.tgz", + "integrity": "sha512-+v5xsiTTsdYqkPj7qz1zlngIsjZedhHDi3xp/9bMurV8kXe9DAr732gNVqtt4X8sI3hOqS3nlFfps5gyVcux6w==" + }, "tar-fs": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.1.tgz", @@ -552,6 +605,14 @@ "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" }, + "write": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/write/-/write-2.0.0.tgz", + "integrity": "sha512-yam9TAqN8sAZokECAejo9HpT2j2s39OgK8i8yxadrFBVo+iSWLfnipRVFulfAw1d2dz5vSuGmlMHYRKG4fysOA==", + "requires": { + "add-filename-increment": "^1.0.0" + } + }, "ws": { "version": "7.4.0", "resolved": "https://registry.npmjs.org/ws/-/ws-7.4.0.tgz", diff --git a/package.json b/package.json index 07d8c05..3f4fe18 100644 --- a/package.json +++ b/package.json @@ -14,14 +14,18 @@ "license": "ISC", "dependencies": { "cheerio": "^1.0.0-rc.3", + "fs-extra": "^9.0.1", "jsonfile": "^6.1.0", "puppeteer": "^5.4.1", "tldts": "^5.6.71", - "typescript": "^4.0.5" + "typescript": "^4.0.5", + "write": "^2.0.0" }, "devDependencies": { "@types/cheerio": "^0.22.22", + "@types/fs-extra": "^9.0.4", "@types/jsonfile": "^6.0.0", - "@types/puppeteer": "^5.4.0" + "@types/puppeteer": "^5.4.0", + "@types/write": "^2.0.0" } } diff --git a/report.txt b/report.txt new file mode 100644 index 0000000..3153e69 --- /dev/null +++ b/report.txt @@ -0,0 +1,15 @@ +---------------------------------------- +Content Egg Product Stock Crawler Report +Total Blog Urls: 1 +Total Products: 7 +Total Out of Stock: 5 +---------------------------------------- + + +https://sportschoolplus.nl/cable-crossover-machines/ +---------------------------------- +Body-Solid GCCO150 Cable Crossover 2 x 75 kg - https://www.fitnesskoerier.nl/body-solid-gcco150-cable-crossover-2-x-75-kg.html?source=tradetracker +Body-Solid GCCO150 Cable Crossover - https://www.fitnessapparaat.nl/artikel/6438/body-solid-gcco150-cable-crossover.html?utm_source=TradeTracker&utm_medium=Affiliate&utm_campaign=Sportschoolplus +Body-Solid GDCC200 Functional Training Center - Cable Crossover - https://www.fitnessapparaat.nl/artikel/6781/body-solid-gdcc200-functional-training-center-cable-crossover.html?utm_source=TradeTracker&utm_medium=Affiliate&utm_campaign=Sportschoolplus +Body-Solid GDCC250 Deluxe Cable Crossover - https://www.fitnessapparaat.nl/artikel/6564/body-solid-gdcc250-deluxe-cable-crossover.html?utm_source=TradeTracker&utm_medium=Affiliate&utm_campaign=Sportschoolplus +Best Fitness BFFT10 Functional Trainer - https://www.fitnesskoerier.nl/best-fitness-bfft10-functional-trainer.html?source=tradetracker diff --git a/src/core.ts b/src/core.ts index 2e6e0ef..a1c008e 100644 --- a/src/core.ts +++ b/src/core.ts @@ -1,6 +1,7 @@ -import puppeteer from "puppeteer"; +import puppeteer, { product } from "puppeteer"; import jsonfile from "jsonfile"; import cheerio from "cheerio"; +import fs from "fs-extra"; import { getDomain } from "tldts"; // Import website modules @@ -13,6 +14,8 @@ start(); // Globals let browser: puppeteer.Browser = null; +let allProducts: Array = []; +let allBlogUrls: Array = []; async function start() { try { @@ -28,9 +31,12 @@ async function start() { console.log(`------------------------------------- \n`); for (let url of urls) { + allBlogUrls.push(url); await crawlBlogPage(url); } + await browser.close(); + await generateReport(); } catch (error) { console.error(`An Error Occured!`, error); } @@ -58,13 +64,13 @@ async function crawlBlogPage(url: string) { console.log(`Detected ${$(".row-products").length} content egg products!`); $(".row-products .cegg-list-logo-title a").each((index, element) => { - let url = $(element).attr("href"); + let productUrl = $(element).attr("href"); let name = $(element).html().trim(); products.push({ name: name, blogUrl: url, - url: "", + url: productUrl, domain: "", inStock: false, }); @@ -73,7 +79,7 @@ async function crawlBlogPage(url: string) { console.log("Checking product stocks..."); for (let index in products) { - let status = await crawlProductStock(products[index].blogUrl); + let status = await crawlProductStock(products[index].url); products[index].domain = status[0]; products[index].url = status[1]; @@ -84,6 +90,8 @@ async function crawlBlogPage(url: string) { } else { console.log(`❌ ${products[index].name} - ${products[index].domain}`); } + + allProducts.push(products[index]); } } catch (error) { @@ -125,5 +133,44 @@ async function crawlProductStock(url: string) { console.log("Error while trying to crawl page! Skipping..."); } } - + +async function generateReport() { + console.log("Generating report..."); + + let file = "./report.txt"; + let totalProducts = 0; + let totalOutStock = 0; + for (let product of allProducts) { + totalProducts++; + + if (product.inStock == false) { + totalOutStock++; + } + } + + await fs.appendFile(file, `----------------------------------------\n`); + await fs.appendFile(file, `Content Egg Product Stock Crawler Report\n`); + await fs.appendFile(file, `Total Blog Urls: ${allBlogUrls.length}\n`); + await fs.appendFile(file, `Total Products: ${totalProducts}\n`); + await fs.appendFile(file, `Total Out of Stock: ${totalOutStock}\n`); + await fs.appendFile(file, `----------------------------------------\n`); + await fs.appendFile(file, ``); + + let lastBlogUrl = ""; + for (let product of allProducts) { + if (product.blogUrl != lastBlogUrl) { + await fs.appendFile(file, `\n\n`); + await fs.appendFile(file, `${product.blogUrl}\n`); + await fs.appendFile(file, `----------------------------------\n`); + } + + if (product.inStock == false) { + await fs.appendFile(file, `${product.domain} - ${product.name} - ${product.url}\n`); + } + + lastBlogUrl = product.blogUrl; + } + + console.log("Report generated!"); +} diff --git a/src/modules/websites.ts b/src/modules/websites.ts index e08f731..0cf6f21 100644 --- a/src/modules/websites.ts +++ b/src/modules/websites.ts @@ -25,6 +25,7 @@ export namespace FitnessKoerier { return true; } } catch (error) { + console.log(error); console.error(`Error occured during stock check!`); return false; } @@ -36,7 +37,11 @@ export namespace FitnessApparaat { export async function check(html: string) { try { const $ = cheerio.load(html); - console.log($('[itemprop="offers"').find(".stock-red").length); + if ($('[itemprop="offers"]').find(".stock-red").length >= 1) { + return false; + } else { + return true; + } } catch (error) { console.log(error); console.error(`Error occured during stock check!`);