From 67a7b1d5705e5431fbfb090e27cdd2e79782bae5 Mon Sep 17 00:00:00 2001 From: Nick Leeman Date: Wed, 21 Jun 2023 13:45:59 +0200 Subject: [PATCH] Added more webshop support, updated system to use csv writer. --- .gitignore | 2 + package-lock.json | 391 ++-------------------------------------- package.json | 1 + src/core.ts | 167 ++++++++--------- src/modules/websites.ts | 34 ++++ 5 files changed, 125 insertions(+), 470 deletions(-) diff --git a/.gitignore b/.gitignore index 6f92a01..8d331a4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # ---> Node # Logs +*.csv + logs *.log npm-debug.log* diff --git a/package-lock.json b/package-lock.json index 60f463d..37b33f2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,8 +11,8 @@ "dependencies": { "axios": "^1.4.0", "cheerio": "^1.0.0-rc.12", + "csv-writer": "^1.6.0", "fs-extra": "^11.1.1", - "got": "^13.0.0", "jsonfile": "^6.1.0", "puppeteer": "^20.7.2", "tldts": "^6.0.8", @@ -84,28 +84,6 @@ } } }, - "node_modules/@sindresorhus/is": { - "version": "5.4.1", - "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.4.1.tgz", - "integrity": "sha512-axlrvsHlHlFmKKMEg4VyvMzFr93JWJj4eIfXY1STVuO2fsImCa7ncaiG5gC8HKOX590AW5RtRsC41/B+OfrSqw==", - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sindresorhus/is?sponsor=1" - } - }, - "node_modules/@szmarczak/http-timer": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-5.0.1.tgz", - "integrity": "sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==", - "dependencies": { - "defer-to-connect": "^2.0.1" - }, - "engines": { - "node": ">=14.16" - } - }, "node_modules/@types/got": { "version": "9.6.12", "resolved": "https://registry.npmjs.org/@types/got/-/got-9.6.12.tgz", @@ -117,11 +95,6 @@ "form-data": "^2.5.0" } }, - "node_modules/@types/http-cache-semantics": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.1.tgz", - "integrity": "sha512-SZs7ekbP8CN0txVG2xVRH6EgKmEm31BOxA07vkFaETzZz1xh+cbt8BcI0slpymvwhx5dlFnQG2rTlPVQn+iRPQ==" - }, "node_modules/@types/jsonfile": { "version": "6.1.1", "resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz", @@ -329,31 +302,6 @@ "node": "*" } }, - "node_modules/cacheable-lookup": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz", - "integrity": "sha512-+qJyx4xiKra8mZrcwhjMRMUhD5NR1R8esPkzIYxX96JiecFoxAXFuz/GpR3+ev4PE1WamHip78wV0vcmPQtp8w==", - "engines": { - "node": ">=14.16" - } - }, - "node_modules/cacheable-request": { - "version": "10.2.10", - "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-10.2.10.tgz", - "integrity": "sha512-v6WB+Epm/qO4Hdlio/sfUn69r5Shgh39SsE9DSd4bIezP0mblOlObI+I0kUEM7J0JFc+I7pSeMeYaOYtX1N/VQ==", - "dependencies": { - "@types/http-cache-semantics": "^4.0.1", - "get-stream": "^6.0.1", - "http-cache-semantics": "^4.1.1", - "keyv": "^4.5.2", - "mimic-response": "^4.0.0", - "normalize-url": "^8.0.0", - "responselike": "^3.0.0" - }, - "engines": { - "node": ">=14.16" - } - }, "node_modules/callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -515,6 +463,11 @@ "url": "https://github.com/sponsors/fb55" } }, + "node_modules/csv-writer": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/csv-writer/-/csv-writer-1.6.0.tgz", + "integrity": "sha512-NOx7YDFWEsM/fTRAJjRpPp8t+MKRVvniAg9wQlUKx20MFrPs73WLJhFf5iteqrxNYnsy924K3Iroh3yNHeYd2g==" + }, "node_modules/data-uri-to-buffer": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz", @@ -539,44 +492,11 @@ } } }, - "node_modules/decompress-response": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", - "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", - "dependencies": { - "mimic-response": "^3.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/decompress-response/node_modules/mimic-response": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", - "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==" }, - "node_modules/defer-to-connect": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz", - "integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==", - "engines": { - "node": ">=10" - } - }, "node_modules/degenerator": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-4.0.3.tgz", @@ -831,14 +751,6 @@ "node": ">= 0.12" } }, - "node_modules/form-data-encoder": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-2.1.4.tgz", - "integrity": "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw==", - "engines": { - "node": ">= 14.17" - } - }, "node_modules/fs-constants": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", @@ -865,17 +777,6 @@ "node": "6.* || 8.* || >= 10.*" } }, - "node_modules/get-stream": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", - "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/get-uri": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.1.tgz", @@ -919,30 +820,6 @@ "node": ">= 4.0.0" } }, - "node_modules/got": { - "version": "13.0.0", - "resolved": "https://registry.npmjs.org/got/-/got-13.0.0.tgz", - "integrity": "sha512-XfBk1CxOOScDcMr9O1yKkNaQyy865NbYs+F7dr4H0LZMVgCj2Le59k6PqbNHoL5ToeaEQUYh6c6yMfVcc6SJxA==", - "dependencies": { - "@sindresorhus/is": "^5.2.0", - "@szmarczak/http-timer": "^5.0.1", - "cacheable-lookup": "^7.0.0", - "cacheable-request": "^10.2.8", - "decompress-response": "^6.0.0", - "form-data-encoder": "^2.1.2", - "get-stream": "^6.0.1", - "http2-wrapper": "^2.1.10", - "lowercase-keys": "^3.0.0", - "p-cancelable": "^3.0.0", - "responselike": "^3.0.0" - }, - "engines": { - "node": ">=16" - }, - "funding": { - "url": "https://github.com/sindresorhus/got?sponsor=1" - } - }, "node_modules/graceful-fs": { "version": "4.2.4", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz", @@ -974,11 +851,6 @@ "entities": "^4.4.0" } }, - "node_modules/http-cache-semantics": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz", - "integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==" - }, "node_modules/http-proxy-agent": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", @@ -991,18 +863,6 @@ "node": ">= 14" } }, - "node_modules/http2-wrapper": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-2.2.0.tgz", - "integrity": "sha512-kZB0wxMo0sh1PehyjJUWRFEd99KC5TLjZ2cULC4f9iqJBAmKQQXEICjxl5iPJRwP40dpeHFqqhm7tYCvODpqpQ==", - "dependencies": { - "quick-lru": "^5.1.1", - "resolve-alpn": "^1.2.0" - }, - "engines": { - "node": ">=10.19.0" - } - }, "node_modules/https-proxy-agent": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.0.tgz", @@ -1088,11 +948,6 @@ "js-yaml": "bin/js-yaml.js" } }, - "node_modules/json-buffer": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", - "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==" - }, "node_modules/json-parse-even-better-errors": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", @@ -1107,14 +962,6 @@ "universalify": "^2.0.0" } }, - "node_modules/keyv": { - "version": "4.5.2", - "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.2.tgz", - "integrity": "sha512-5MHbFaKn8cNSmVW7BYnijeAVlE4cYA/SVkifVgrh7yotnfhKmjuXpDKjrABLnT0SfHWV21P8ow07OGfRrNDg8g==", - "dependencies": { - "json-buffer": "3.0.1" - } - }, "node_modules/levn": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", @@ -1132,17 +979,6 @@ "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" }, - "node_modules/lowercase-keys": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-3.0.0.tgz", - "integrity": "sha512-ozCC6gdQ+glXOQsveKD0YsDy8DSQFjDTz4zyzEHNV5+JP5D62LmfDZ6o1cycFx9ouG940M5dE8C8CTewdj2YWQ==", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/lru-cache": { "version": "7.18.3", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", @@ -1170,17 +1006,6 @@ "node": ">= 0.6" } }, - "node_modules/mimic-response": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-4.0.0.tgz", - "integrity": "sha512-e5ISH9xMYU0DzrT+jl8q2ze9D6eWBto+I8CNpe+VI+K2J/F/k3PdkdTdz4wvGVH4NTpo+NRYTVIuMQEMMcsLqg==", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/mitt": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.0.tgz", @@ -1223,17 +1048,6 @@ } } }, - "node_modules/normalize-url": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-8.0.0.tgz", - "integrity": "sha512-uVFpKhj5MheNBJRTiMZ9pE/7hD1QTeEvugSJW/OmLzAp78PB5O6adfMNTvmfKhXBkvCzC+rqifWcVYpGFwTjnw==", - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/nth-check": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", @@ -1269,14 +1083,6 @@ "node": ">= 0.8.0" } }, - "node_modules/p-cancelable": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-3.0.0.tgz", - "integrity": "sha512-mlVgR3PGuzlo0MmTdk4cXqXWlwQDLnONTAg6sm62XkMJEiRxN3GL3SffkYvqwonbkJBcrI7Uvv5Zh9yjvn2iUw==", - "engines": { - "node": ">=12.20" - } - }, "node_modules/pac-proxy-agent": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-6.0.3.tgz", @@ -1457,17 +1263,6 @@ } } }, - "node_modules/quick-lru": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", - "integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==", - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/readable-stream": { "version": "3.6.2", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", @@ -1489,11 +1284,6 @@ "node": ">=0.10.0" } }, - "node_modules/resolve-alpn": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz", - "integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g==" - }, "node_modules/resolve-from": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", @@ -1502,20 +1292,6 @@ "node": ">=4" } }, - "node_modules/responselike": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/responselike/-/responselike-3.0.0.tgz", - "integrity": "sha512-40yHxbNcl2+rzXvZuVkrYohathsSJlMTXKryG5y8uciHv1+xDLHQpgjG64JUO9nrEq2jGLH6IZ8BcZyw3wrweg==", - "dependencies": { - "lowercase-keys": "^3.0.0" - }, - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", @@ -1937,19 +1713,6 @@ "yargs": "17.7.1" } }, - "@sindresorhus/is": { - "version": "5.4.1", - "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.4.1.tgz", - "integrity": "sha512-axlrvsHlHlFmKKMEg4VyvMzFr93JWJj4eIfXY1STVuO2fsImCa7ncaiG5gC8HKOX590AW5RtRsC41/B+OfrSqw==" - }, - "@szmarczak/http-timer": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-5.0.1.tgz", - "integrity": "sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==", - "requires": { - "defer-to-connect": "^2.0.1" - } - }, "@types/got": { "version": "9.6.12", "resolved": "https://registry.npmjs.org/@types/got/-/got-9.6.12.tgz", @@ -1961,11 +1724,6 @@ "form-data": "^2.5.0" } }, - "@types/http-cache-semantics": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.1.tgz", - "integrity": "sha512-SZs7ekbP8CN0txVG2xVRH6EgKmEm31BOxA07vkFaETzZz1xh+cbt8BcI0slpymvwhx5dlFnQG2rTlPVQn+iRPQ==" - }, "@types/jsonfile": { "version": "6.1.1", "resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz", @@ -2114,25 +1872,6 @@ "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==" }, - "cacheable-lookup": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz", - "integrity": "sha512-+qJyx4xiKra8mZrcwhjMRMUhD5NR1R8esPkzIYxX96JiecFoxAXFuz/GpR3+ev4PE1WamHip78wV0vcmPQtp8w==" - }, - "cacheable-request": { - "version": "10.2.10", - "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-10.2.10.tgz", - "integrity": "sha512-v6WB+Epm/qO4Hdlio/sfUn69r5Shgh39SsE9DSd4bIezP0mblOlObI+I0kUEM7J0JFc+I7pSeMeYaOYtX1N/VQ==", - "requires": { - "@types/http-cache-semantics": "^4.0.1", - "get-stream": "^6.0.1", - "http-cache-semantics": "^4.1.1", - "keyv": "^4.5.2", - "mimic-response": "^4.0.0", - "normalize-url": "^8.0.0", - "responselike": "^3.0.0" - } - }, "callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -2255,6 +1994,11 @@ "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==" }, + "csv-writer": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/csv-writer/-/csv-writer-1.6.0.tgz", + "integrity": "sha512-NOx7YDFWEsM/fTRAJjRpPp8t+MKRVvniAg9wQlUKx20MFrPs73WLJhFf5iteqrxNYnsy924K3Iroh3yNHeYd2g==" + }, "data-uri-to-buffer": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz", @@ -2268,31 +2012,11 @@ "ms": "2.1.2" } }, - "decompress-response": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", - "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", - "requires": { - "mimic-response": "^3.1.0" - }, - "dependencies": { - "mimic-response": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", - "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==" - } - } - }, "deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==" }, - "defer-to-connect": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz", - "integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==" - }, "degenerator": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-4.0.3.tgz", @@ -2460,11 +2184,6 @@ "mime-types": "^2.1.12" } }, - "form-data-encoder": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-2.1.4.tgz", - "integrity": "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw==" - }, "fs-constants": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", @@ -2485,11 +2204,6 @@ "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==" }, - "get-stream": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", - "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==" - }, "get-uri": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.1.tgz", @@ -2526,24 +2240,6 @@ } } }, - "got": { - "version": "13.0.0", - "resolved": "https://registry.npmjs.org/got/-/got-13.0.0.tgz", - "integrity": "sha512-XfBk1CxOOScDcMr9O1yKkNaQyy865NbYs+F7dr4H0LZMVgCj2Le59k6PqbNHoL5ToeaEQUYh6c6yMfVcc6SJxA==", - "requires": { - "@sindresorhus/is": "^5.2.0", - "@szmarczak/http-timer": "^5.0.1", - "cacheable-lookup": "^7.0.0", - "cacheable-request": "^10.2.8", - "decompress-response": "^6.0.0", - "form-data-encoder": "^2.1.2", - "get-stream": "^6.0.1", - "http2-wrapper": "^2.1.10", - "lowercase-keys": "^3.0.0", - "p-cancelable": "^3.0.0", - "responselike": "^3.0.0" - } - }, "graceful-fs": { "version": "4.2.4", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz", @@ -2565,11 +2261,6 @@ "entities": "^4.4.0" } }, - "http-cache-semantics": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz", - "integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==" - }, "http-proxy-agent": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", @@ -2579,15 +2270,6 @@ "debug": "^4.3.4" } }, - "http2-wrapper": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-2.2.0.tgz", - "integrity": "sha512-kZB0wxMo0sh1PehyjJUWRFEd99KC5TLjZ2cULC4f9iqJBAmKQQXEICjxl5iPJRwP40dpeHFqqhm7tYCvODpqpQ==", - "requires": { - "quick-lru": "^5.1.1", - "resolve-alpn": "^1.2.0" - } - }, "https-proxy-agent": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.0.tgz", @@ -2644,11 +2326,6 @@ "argparse": "^2.0.1" } }, - "json-buffer": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", - "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==" - }, "json-parse-even-better-errors": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", @@ -2663,14 +2340,6 @@ "universalify": "^2.0.0" } }, - "keyv": { - "version": "4.5.2", - "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.2.tgz", - "integrity": "sha512-5MHbFaKn8cNSmVW7BYnijeAVlE4cYA/SVkifVgrh7yotnfhKmjuXpDKjrABLnT0SfHWV21P8ow07OGfRrNDg8g==", - "requires": { - "json-buffer": "3.0.1" - } - }, "levn": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", @@ -2685,11 +2354,6 @@ "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" }, - "lowercase-keys": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-3.0.0.tgz", - "integrity": "sha512-ozCC6gdQ+glXOQsveKD0YsDy8DSQFjDTz4zyzEHNV5+JP5D62LmfDZ6o1cycFx9ouG940M5dE8C8CTewdj2YWQ==" - }, "lru-cache": { "version": "7.18.3", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", @@ -2708,11 +2372,6 @@ "mime-db": "1.52.0" } }, - "mimic-response": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-4.0.0.tgz", - "integrity": "sha512-e5ISH9xMYU0DzrT+jl8q2ze9D6eWBto+I8CNpe+VI+K2J/F/k3PdkdTdz4wvGVH4NTpo+NRYTVIuMQEMMcsLqg==" - }, "mitt": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.0.tgz", @@ -2741,11 +2400,6 @@ "whatwg-url": "^5.0.0" } }, - "normalize-url": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-8.0.0.tgz", - "integrity": "sha512-uVFpKhj5MheNBJRTiMZ9pE/7hD1QTeEvugSJW/OmLzAp78PB5O6adfMNTvmfKhXBkvCzC+rqifWcVYpGFwTjnw==" - }, "nth-check": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", @@ -2775,11 +2429,6 @@ "word-wrap": "~1.2.3" } }, - "p-cancelable": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-3.0.0.tgz", - "integrity": "sha512-mlVgR3PGuzlo0MmTdk4cXqXWlwQDLnONTAg6sm62XkMJEiRxN3GL3SffkYvqwonbkJBcrI7Uvv5Zh9yjvn2iUw==" - }, "pac-proxy-agent": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-6.0.3.tgz", @@ -2912,11 +2561,6 @@ "ws": "8.13.0" } }, - "quick-lru": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", - "integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==" - }, "readable-stream": { "version": "3.6.2", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", @@ -2932,24 +2576,11 @@ "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==" }, - "resolve-alpn": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz", - "integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g==" - }, "resolve-from": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==" }, - "responselike": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/responselike/-/responselike-3.0.0.tgz", - "integrity": "sha512-40yHxbNcl2+rzXvZuVkrYohathsSJlMTXKryG5y8uciHv1+xDLHQpgjG64JUO9nrEq2jGLH6IZ8BcZyw3wrweg==", - "requires": { - "lowercase-keys": "^3.0.0" - } - }, "safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", diff --git a/package.json b/package.json index 0e25229..e62724e 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "dependencies": { "axios": "^1.4.0", "cheerio": "^1.0.0-rc.12", + "csv-writer": "^1.6.0", "fs-extra": "^11.1.1", "jsonfile": "^6.1.0", "puppeteer": "^20.7.2", diff --git a/src/core.ts b/src/core.ts index ed03d2b..73b1731 100644 --- a/src/core.ts +++ b/src/core.ts @@ -5,28 +5,30 @@ import fs from "fs"; import { getDomain } from "tldts"; import { Browser, launch } from "puppeteer"; import axios from "axios"; -import { Dobey, LDJsonParser } from "./modules/websites"; +import { Dobey, EP, LDJsonParser, Macrovet } from "./modules/websites"; +import { createObjectCsvWriter } from "csv-writer"; +import { CsvWriter } from "csv-writer/src/lib/csv-writer"; +import { ObjectMap } from "csv-writer/src/lib/lang/object"; interface IProduct { - name: string; - blogUrl: string; - url: string; domain: string; - inStock: boolean; + product_name: string; + product_url: string; + in_stock: boolean; + blog_url: string; }; // Globals let browser: Browser = null; let sitemapUrl: string = ""; -let allProducts: Array = []; -let allBlogUrls: Array = []; +let writer: CsvWriter> = null; // debugStart(); start(); async function debugStart() { browser = await launch({ headless: false }); - console.log(await crawlProductStock("https://www.kabels.nl/nl_nl/ewent-ew3240-draadloze-multi-connect-muis-600-2400-dpi-zwart-34863350.html")); + console.log(await crawlProductStock("https://www.macrovet.nl/K-Othrine-7-5-SC-1-liter-Insectenbestrijdingsmiddel/202561")); } async function start() { @@ -61,7 +63,7 @@ async function start() { for (const sitemapUrl of sitemapUrls) { - console.log(`Crawling crawled sitemap: ${sitemapUrl}`); + console.log(`Crawling found sitemap: ${sitemapUrl}`); const sitemapRequest = await axios.get(sitemapUrl); const $ = cheerio.load(sitemapRequest.data, { xmlMode: true }); @@ -76,15 +78,30 @@ async function start() { console.log(`Crawled ${urls.length} urls from sitemaps.`); - console.log(`Starting crawler browser..`); browser = await launch({ headless: true }); + console.log(`Initialized puppeteer browser.`); + + const today: Date = new Date(); + const datecode: string = today.getFullYear().toString() + "-" + (today.getMonth() + 1).toString() + "-" + today.getDay().toString(); + const domain: string = getDomain(sitemapUrl); + const file = `${domain}_${datecode}_report.csv`; + writer = createObjectCsvWriter({ + path: file, + header: [ + {id: 'domain', title: 'WEBSHOP_DOMAIN'}, + {id: 'product_name', title: 'PRODUCT_NAME'}, + {id: 'product_url', title: 'PRODUCT_URL'}, + {id: 'in_stock', title: 'IN_STOCK'}, + {id: 'blog_url', title: 'BLOG_URL'} + ] + }); + console.log(`Initialized csv writer.`); console.log(`Initialization done.`); console.log(`------------------------------------- \n`); - for (let url of urls) { + for (const url of urls) { try { - allBlogUrls.push(url); await crawlUrl(url); } catch (error) { continue; @@ -92,9 +109,9 @@ async function start() { } await browser.close(); - await generateReport(); + console.log(`Finished crawling all urls. Saved report to ${file}`); } catch (error) { - console.error(`An Error Occured!`, error); + console.error(`A error occurred!`, error); } } @@ -115,21 +132,21 @@ async function crawlUrl(url: string) { // Load html in cheerio object const $ = cheerio.load(html); - let products: Array = []; + const products: Array = []; if ($(".row-products").length >= 1) { console.log(`- Detected ${$(".row-products").length} content egg row type products.`); $(".row-products .cegg-list-logo-title a").each((index, element) => { - let productUrl = $(element).attr("href"); - let name = $(element).html().trim(); + const productUrl = $(element).attr("href"); + const name = $(element).html().trim(); products.push({ - name: name, - blogUrl: url, - url: productUrl, domain: "", - inStock: false, + product_name: name, + product_url: productUrl, + blog_url: url, + in_stock: false, }); }); } @@ -142,47 +159,47 @@ async function crawlUrl(url: string) { let name = $(element).find("h2").first().html().trim(); products.push({ - name: name, - blogUrl: url, - url: productUrl, domain: "", - inStock: false, + product_name: name, + product_url: productUrl, + blog_url: url, + in_stock: false, }); }); } console.log("- Checking product stocks..."); - for (let index in products) { + for (const index in products) { try { - let status = await crawlProductStock(products[index].url); + const status = await crawlProductStock(products[index].product_url); products[index].domain = status[0]; - products[index].url = status[1]; - products[index].inStock = status[2]; + products[index].product_url = status[1]; + products[index].in_stock = status[2]; - if (products[index].inStock) { - console.log(` [IN STOCK] ${products[index].name} - ${products[index].domain}`); + if (products[index].in_stock) { + console.log(` [IN STOCK] ${products[index].product_name} - ${products[index].domain}`); } else { - console.log(` [OUT OF STOCK] ${products[index].name} - ${products[index].domain} - ${products[index].url}`); + console.log(` [OUT OF STOCK] ${products[index].product_name} - ${products[index].domain}`); } - allProducts.push(products[index]); + // Write to csv + await writer.writeRecords([products[index]]); } catch (error) { - console.log("- Skipping product..."); + console.log(` [ERROR] ${products[index].product_name} - ${products[index].domain} - ${products[index].product_url}`, error); continue; } } console.log(""); console.log(""); - } catch (error) { console.log("-- Error while trying to crawl page! Skipping..."); } } -async function crawlProductStock(url: string) { +async function crawlProductStock(url: string): Promise<[string, string, boolean]> { try { // Open new page and goto url const page = await browser.newPage(); @@ -218,7 +235,31 @@ async function crawlProductStock(url: string) { case "petsonline.nl": return [domain, page.url(), await LDJsonParser.check(html)]; + + case "coolblue.nl": + return [domain, page.url(), await LDJsonParser.check(html)]; + case "bcc.nl": + return [domain, page.url(), await LDJsonParser.check(html)]; + + case "azerty.nl": + return [domain, page.url(), await LDJsonParser.check(html)]; + + case "cameranu.nl": + return [domain, page.url(), await LDJsonParser.check(html)]; + + case "ep.nl": + return [domain, page.url(), await EP.check(html)]; + + case "alternate.nl": + return [domain, page.url(), await LDJsonParser.check(html)]; + + case "macrovet.nl": + return [domain, page.url(), await Macrovet.check(html)]; + + case "ezydog.nl": + return [domain, page.url(), await LDJsonParser.check(html)]; + default: console.error(`-- ${domain} is not an supported website! Cannot check stock!`); return [domain, page.url(), false]; @@ -227,58 +268,4 @@ async function crawlProductStock(url: string) { console.error(error); console.log("-- Error while trying to crawl page! Skipping..."); } -} - -async function generateReport() { - console.log("Generating report..."); - - let today: Date = new Date(); - let datecode: string = today.getFullYear().toString() + "-" + (today.getMonth() + 1).toString() + "-" + today.getDay().toString(); - let domain: string = getDomain(sitemapUrl); - let file = `${domain}_${datecode}_report.txt`; - - let totalProducts = 0; - let totalOutStock = 0; - for (let product of allProducts) { - totalProducts++; - - if (product.inStock == false) { - totalOutStock++; - } - } - - appendLn(file, `Content Egg Product Stock Crawler Report\n`); - appendLn(file, `Total Blog Urls: ${allBlogUrls.length}\n`); - appendLn(file, `Total Products: ${totalProducts}\n`); - appendLn(file, `Total Out of Stock: ${totalOutStock}\n`); - appendLn(file, ``); - - let lastBlogUrl = ""; - for (let product of allProducts) { - if (product.blogUrl != lastBlogUrl) { - appendLn(file, `\n\n`); - appendLn(file, `${product.blogUrl}\n`); - appendLn(file, `----------------------------------\n`); - } - - if (product.inStock == false) { - appendLn(file, `${product.domain} - ${product.name} - ${product.url}\n`); - } - - lastBlogUrl = product.blogUrl; - } - - console.log("Report generated."); -} - -function appendLn(file: string, text: string): Promise { - return new Promise((resolve, reject) => { - fs.appendFile(file, text + "\n", (err) => { - if (err) { - reject(err); - } else { - resolve(); - } - }); - }); } \ No newline at end of file diff --git a/src/modules/websites.ts b/src/modules/websites.ts index 2194d1d..367759a 100644 --- a/src/modules/websites.ts +++ b/src/modules/websites.ts @@ -75,3 +75,37 @@ export namespace Dobey { } } +export namespace EP { + export async function check(html: string) { + try { + const $ = cheerio.load(html); + if ($(".product__info--stock_row").first().find("p").hasClass("is-green")) { + return true; + } else { + return false; + } + } catch (error) { + console.log(error); + console.error(`Error occured during stock check!`); + return false; + } + } +} + + +export namespace Macrovet { + export async function check(html: string) { + try { + const $ = cheerio.load(html); + if ($(".product-detail-price-container .product-available-stock").first().text().includes("0 in voorraad")) { + return false; + } else { + return true; + } + } catch (error) { + console.log(error); + console.error(`Error occured during stock check!`); + return false; + } + } +}