Added more webshop support, updated system to use csv writer.

This commit is contained in:
Nick Leeman 2023-06-21 13:45:59 +02:00
parent 7c6025e5aa
commit 67a7b1d570
5 changed files with 125 additions and 470 deletions

2
.gitignore vendored
View File

@ -1,5 +1,7 @@
# ---> Node
# Logs
*.csv
logs
*.log
npm-debug.log*

391
package-lock.json generated
View File

@ -11,8 +11,8 @@
"dependencies": {
"axios": "^1.4.0",
"cheerio": "^1.0.0-rc.12",
"csv-writer": "^1.6.0",
"fs-extra": "^11.1.1",
"got": "^13.0.0",
"jsonfile": "^6.1.0",
"puppeteer": "^20.7.2",
"tldts": "^6.0.8",
@ -84,28 +84,6 @@
}
}
},
"node_modules/@sindresorhus/is": {
"version": "5.4.1",
"resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.4.1.tgz",
"integrity": "sha512-axlrvsHlHlFmKKMEg4VyvMzFr93JWJj4eIfXY1STVuO2fsImCa7ncaiG5gC8HKOX590AW5RtRsC41/B+OfrSqw==",
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sindresorhus/is?sponsor=1"
}
},
"node_modules/@szmarczak/http-timer": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-5.0.1.tgz",
"integrity": "sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==",
"dependencies": {
"defer-to-connect": "^2.0.1"
},
"engines": {
"node": ">=14.16"
}
},
"node_modules/@types/got": {
"version": "9.6.12",
"resolved": "https://registry.npmjs.org/@types/got/-/got-9.6.12.tgz",
@ -117,11 +95,6 @@
"form-data": "^2.5.0"
}
},
"node_modules/@types/http-cache-semantics": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.1.tgz",
"integrity": "sha512-SZs7ekbP8CN0txVG2xVRH6EgKmEm31BOxA07vkFaETzZz1xh+cbt8BcI0slpymvwhx5dlFnQG2rTlPVQn+iRPQ=="
},
"node_modules/@types/jsonfile": {
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz",
@ -329,31 +302,6 @@
"node": "*"
}
},
"node_modules/cacheable-lookup": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz",
"integrity": "sha512-+qJyx4xiKra8mZrcwhjMRMUhD5NR1R8esPkzIYxX96JiecFoxAXFuz/GpR3+ev4PE1WamHip78wV0vcmPQtp8w==",
"engines": {
"node": ">=14.16"
}
},
"node_modules/cacheable-request": {
"version": "10.2.10",
"resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-10.2.10.tgz",
"integrity": "sha512-v6WB+Epm/qO4Hdlio/sfUn69r5Shgh39SsE9DSd4bIezP0mblOlObI+I0kUEM7J0JFc+I7pSeMeYaOYtX1N/VQ==",
"dependencies": {
"@types/http-cache-semantics": "^4.0.1",
"get-stream": "^6.0.1",
"http-cache-semantics": "^4.1.1",
"keyv": "^4.5.2",
"mimic-response": "^4.0.0",
"normalize-url": "^8.0.0",
"responselike": "^3.0.0"
},
"engines": {
"node": ">=14.16"
}
},
"node_modules/callsites": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@ -515,6 +463,11 @@
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/csv-writer": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/csv-writer/-/csv-writer-1.6.0.tgz",
"integrity": "sha512-NOx7YDFWEsM/fTRAJjRpPp8t+MKRVvniAg9wQlUKx20MFrPs73WLJhFf5iteqrxNYnsy924K3Iroh3yNHeYd2g=="
},
"node_modules/data-uri-to-buffer": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz",
@ -539,44 +492,11 @@
}
}
},
"node_modules/decompress-response": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
"dependencies": {
"mimic-response": "^3.1.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/decompress-response/node_modules/mimic-response": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/deep-is": {
"version": "0.1.4",
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
"integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="
},
"node_modules/defer-to-connect": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz",
"integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==",
"engines": {
"node": ">=10"
}
},
"node_modules/degenerator": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/degenerator/-/degenerator-4.0.3.tgz",
@ -831,14 +751,6 @@
"node": ">= 0.12"
}
},
"node_modules/form-data-encoder": {
"version": "2.1.4",
"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-2.1.4.tgz",
"integrity": "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw==",
"engines": {
"node": ">= 14.17"
}
},
"node_modules/fs-constants": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
@ -865,17 +777,6 @@
"node": "6.* || 8.* || >= 10.*"
}
},
"node_modules/get-stream": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
"integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/get-uri": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.1.tgz",
@ -919,30 +820,6 @@
"node": ">= 4.0.0"
}
},
"node_modules/got": {
"version": "13.0.0",
"resolved": "https://registry.npmjs.org/got/-/got-13.0.0.tgz",
"integrity": "sha512-XfBk1CxOOScDcMr9O1yKkNaQyy865NbYs+F7dr4H0LZMVgCj2Le59k6PqbNHoL5ToeaEQUYh6c6yMfVcc6SJxA==",
"dependencies": {
"@sindresorhus/is": "^5.2.0",
"@szmarczak/http-timer": "^5.0.1",
"cacheable-lookup": "^7.0.0",
"cacheable-request": "^10.2.8",
"decompress-response": "^6.0.0",
"form-data-encoder": "^2.1.2",
"get-stream": "^6.0.1",
"http2-wrapper": "^2.1.10",
"lowercase-keys": "^3.0.0",
"p-cancelable": "^3.0.0",
"responselike": "^3.0.0"
},
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/sindresorhus/got?sponsor=1"
}
},
"node_modules/graceful-fs": {
"version": "4.2.4",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz",
@ -974,11 +851,6 @@
"entities": "^4.4.0"
}
},
"node_modules/http-cache-semantics": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz",
"integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ=="
},
"node_modules/http-proxy-agent": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz",
@ -991,18 +863,6 @@
"node": ">= 14"
}
},
"node_modules/http2-wrapper": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-2.2.0.tgz",
"integrity": "sha512-kZB0wxMo0sh1PehyjJUWRFEd99KC5TLjZ2cULC4f9iqJBAmKQQXEICjxl5iPJRwP40dpeHFqqhm7tYCvODpqpQ==",
"dependencies": {
"quick-lru": "^5.1.1",
"resolve-alpn": "^1.2.0"
},
"engines": {
"node": ">=10.19.0"
}
},
"node_modules/https-proxy-agent": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.0.tgz",
@ -1088,11 +948,6 @@
"js-yaml": "bin/js-yaml.js"
}
},
"node_modules/json-buffer": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz",
"integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ=="
},
"node_modules/json-parse-even-better-errors": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@ -1107,14 +962,6 @@
"universalify": "^2.0.0"
}
},
"node_modules/keyv": {
"version": "4.5.2",
"resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.2.tgz",
"integrity": "sha512-5MHbFaKn8cNSmVW7BYnijeAVlE4cYA/SVkifVgrh7yotnfhKmjuXpDKjrABLnT0SfHWV21P8ow07OGfRrNDg8g==",
"dependencies": {
"json-buffer": "3.0.1"
}
},
"node_modules/levn": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz",
@ -1132,17 +979,6 @@
"resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
"integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="
},
"node_modules/lowercase-keys": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-3.0.0.tgz",
"integrity": "sha512-ozCC6gdQ+glXOQsveKD0YsDy8DSQFjDTz4zyzEHNV5+JP5D62LmfDZ6o1cycFx9ouG940M5dE8C8CTewdj2YWQ==",
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/lru-cache": {
"version": "7.18.3",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
@ -1170,17 +1006,6 @@
"node": ">= 0.6"
}
},
"node_modules/mimic-response": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-4.0.0.tgz",
"integrity": "sha512-e5ISH9xMYU0DzrT+jl8q2ze9D6eWBto+I8CNpe+VI+K2J/F/k3PdkdTdz4wvGVH4NTpo+NRYTVIuMQEMMcsLqg==",
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/mitt": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.0.tgz",
@ -1223,17 +1048,6 @@
}
}
},
"node_modules/normalize-url": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-8.0.0.tgz",
"integrity": "sha512-uVFpKhj5MheNBJRTiMZ9pE/7hD1QTeEvugSJW/OmLzAp78PB5O6adfMNTvmfKhXBkvCzC+rqifWcVYpGFwTjnw==",
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
@ -1269,14 +1083,6 @@
"node": ">= 0.8.0"
}
},
"node_modules/p-cancelable": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-3.0.0.tgz",
"integrity": "sha512-mlVgR3PGuzlo0MmTdk4cXqXWlwQDLnONTAg6sm62XkMJEiRxN3GL3SffkYvqwonbkJBcrI7Uvv5Zh9yjvn2iUw==",
"engines": {
"node": ">=12.20"
}
},
"node_modules/pac-proxy-agent": {
"version": "6.0.3",
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-6.0.3.tgz",
@ -1457,17 +1263,6 @@
}
}
},
"node_modules/quick-lru": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz",
"integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
@ -1489,11 +1284,6 @@
"node": ">=0.10.0"
}
},
"node_modules/resolve-alpn": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz",
"integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g=="
},
"node_modules/resolve-from": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@ -1502,20 +1292,6 @@
"node": ">=4"
}
},
"node_modules/responselike": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/responselike/-/responselike-3.0.0.tgz",
"integrity": "sha512-40yHxbNcl2+rzXvZuVkrYohathsSJlMTXKryG5y8uciHv1+xDLHQpgjG64JUO9nrEq2jGLH6IZ8BcZyw3wrweg==",
"dependencies": {
"lowercase-keys": "^3.0.0"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@ -1937,19 +1713,6 @@
"yargs": "17.7.1"
}
},
"@sindresorhus/is": {
"version": "5.4.1",
"resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.4.1.tgz",
"integrity": "sha512-axlrvsHlHlFmKKMEg4VyvMzFr93JWJj4eIfXY1STVuO2fsImCa7ncaiG5gC8HKOX590AW5RtRsC41/B+OfrSqw=="
},
"@szmarczak/http-timer": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-5.0.1.tgz",
"integrity": "sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==",
"requires": {
"defer-to-connect": "^2.0.1"
}
},
"@types/got": {
"version": "9.6.12",
"resolved": "https://registry.npmjs.org/@types/got/-/got-9.6.12.tgz",
@ -1961,11 +1724,6 @@
"form-data": "^2.5.0"
}
},
"@types/http-cache-semantics": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.1.tgz",
"integrity": "sha512-SZs7ekbP8CN0txVG2xVRH6EgKmEm31BOxA07vkFaETzZz1xh+cbt8BcI0slpymvwhx5dlFnQG2rTlPVQn+iRPQ=="
},
"@types/jsonfile": {
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz",
@ -2114,25 +1872,6 @@
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
"integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ=="
},
"cacheable-lookup": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz",
"integrity": "sha512-+qJyx4xiKra8mZrcwhjMRMUhD5NR1R8esPkzIYxX96JiecFoxAXFuz/GpR3+ev4PE1WamHip78wV0vcmPQtp8w=="
},
"cacheable-request": {
"version": "10.2.10",
"resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-10.2.10.tgz",
"integrity": "sha512-v6WB+Epm/qO4Hdlio/sfUn69r5Shgh39SsE9DSd4bIezP0mblOlObI+I0kUEM7J0JFc+I7pSeMeYaOYtX1N/VQ==",
"requires": {
"@types/http-cache-semantics": "^4.0.1",
"get-stream": "^6.0.1",
"http-cache-semantics": "^4.1.1",
"keyv": "^4.5.2",
"mimic-response": "^4.0.0",
"normalize-url": "^8.0.0",
"responselike": "^3.0.0"
}
},
"callsites": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@ -2255,6 +1994,11 @@
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw=="
},
"csv-writer": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/csv-writer/-/csv-writer-1.6.0.tgz",
"integrity": "sha512-NOx7YDFWEsM/fTRAJjRpPp8t+MKRVvniAg9wQlUKx20MFrPs73WLJhFf5iteqrxNYnsy924K3Iroh3yNHeYd2g=="
},
"data-uri-to-buffer": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz",
@ -2268,31 +2012,11 @@
"ms": "2.1.2"
}
},
"decompress-response": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
"requires": {
"mimic-response": "^3.1.0"
},
"dependencies": {
"mimic-response": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ=="
}
}
},
"deep-is": {
"version": "0.1.4",
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
"integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="
},
"defer-to-connect": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz",
"integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg=="
},
"degenerator": {
"version": "4.0.3",
"resolved": "https://registry.npmjs.org/degenerator/-/degenerator-4.0.3.tgz",
@ -2460,11 +2184,6 @@
"mime-types": "^2.1.12"
}
},
"form-data-encoder": {
"version": "2.1.4",
"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-2.1.4.tgz",
"integrity": "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw=="
},
"fs-constants": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
@ -2485,11 +2204,6 @@
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg=="
},
"get-stream": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
"integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg=="
},
"get-uri": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.1.tgz",
@ -2526,24 +2240,6 @@
}
}
},
"got": {
"version": "13.0.0",
"resolved": "https://registry.npmjs.org/got/-/got-13.0.0.tgz",
"integrity": "sha512-XfBk1CxOOScDcMr9O1yKkNaQyy865NbYs+F7dr4H0LZMVgCj2Le59k6PqbNHoL5ToeaEQUYh6c6yMfVcc6SJxA==",
"requires": {
"@sindresorhus/is": "^5.2.0",
"@szmarczak/http-timer": "^5.0.1",
"cacheable-lookup": "^7.0.0",
"cacheable-request": "^10.2.8",
"decompress-response": "^6.0.0",
"form-data-encoder": "^2.1.2",
"get-stream": "^6.0.1",
"http2-wrapper": "^2.1.10",
"lowercase-keys": "^3.0.0",
"p-cancelable": "^3.0.0",
"responselike": "^3.0.0"
}
},
"graceful-fs": {
"version": "4.2.4",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz",
@ -2565,11 +2261,6 @@
"entities": "^4.4.0"
}
},
"http-cache-semantics": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz",
"integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ=="
},
"http-proxy-agent": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz",
@ -2579,15 +2270,6 @@
"debug": "^4.3.4"
}
},
"http2-wrapper": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-2.2.0.tgz",
"integrity": "sha512-kZB0wxMo0sh1PehyjJUWRFEd99KC5TLjZ2cULC4f9iqJBAmKQQXEICjxl5iPJRwP40dpeHFqqhm7tYCvODpqpQ==",
"requires": {
"quick-lru": "^5.1.1",
"resolve-alpn": "^1.2.0"
}
},
"https-proxy-agent": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.0.tgz",
@ -2644,11 +2326,6 @@
"argparse": "^2.0.1"
}
},
"json-buffer": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz",
"integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ=="
},
"json-parse-even-better-errors": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@ -2663,14 +2340,6 @@
"universalify": "^2.0.0"
}
},
"keyv": {
"version": "4.5.2",
"resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.2.tgz",
"integrity": "sha512-5MHbFaKn8cNSmVW7BYnijeAVlE4cYA/SVkifVgrh7yotnfhKmjuXpDKjrABLnT0SfHWV21P8ow07OGfRrNDg8g==",
"requires": {
"json-buffer": "3.0.1"
}
},
"levn": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz",
@ -2685,11 +2354,6 @@
"resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
"integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="
},
"lowercase-keys": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-3.0.0.tgz",
"integrity": "sha512-ozCC6gdQ+glXOQsveKD0YsDy8DSQFjDTz4zyzEHNV5+JP5D62LmfDZ6o1cycFx9ouG940M5dE8C8CTewdj2YWQ=="
},
"lru-cache": {
"version": "7.18.3",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
@ -2708,11 +2372,6 @@
"mime-db": "1.52.0"
}
},
"mimic-response": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-4.0.0.tgz",
"integrity": "sha512-e5ISH9xMYU0DzrT+jl8q2ze9D6eWBto+I8CNpe+VI+K2J/F/k3PdkdTdz4wvGVH4NTpo+NRYTVIuMQEMMcsLqg=="
},
"mitt": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.0.tgz",
@ -2741,11 +2400,6 @@
"whatwg-url": "^5.0.0"
}
},
"normalize-url": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-8.0.0.tgz",
"integrity": "sha512-uVFpKhj5MheNBJRTiMZ9pE/7hD1QTeEvugSJW/OmLzAp78PB5O6adfMNTvmfKhXBkvCzC+rqifWcVYpGFwTjnw=="
},
"nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
@ -2775,11 +2429,6 @@
"word-wrap": "~1.2.3"
}
},
"p-cancelable": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-3.0.0.tgz",
"integrity": "sha512-mlVgR3PGuzlo0MmTdk4cXqXWlwQDLnONTAg6sm62XkMJEiRxN3GL3SffkYvqwonbkJBcrI7Uvv5Zh9yjvn2iUw=="
},
"pac-proxy-agent": {
"version": "6.0.3",
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-6.0.3.tgz",
@ -2912,11 +2561,6 @@
"ws": "8.13.0"
}
},
"quick-lru": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz",
"integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA=="
},
"readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
@ -2932,24 +2576,11 @@
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q=="
},
"resolve-alpn": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz",
"integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g=="
},
"resolve-from": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
"integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g=="
},
"responselike": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/responselike/-/responselike-3.0.0.tgz",
"integrity": "sha512-40yHxbNcl2+rzXvZuVkrYohathsSJlMTXKryG5y8uciHv1+xDLHQpgjG64JUO9nrEq2jGLH6IZ8BcZyw3wrweg==",
"requires": {
"lowercase-keys": "^3.0.0"
}
},
"safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",

View File

@ -15,6 +15,7 @@
"dependencies": {
"axios": "^1.4.0",
"cheerio": "^1.0.0-rc.12",
"csv-writer": "^1.6.0",
"fs-extra": "^11.1.1",
"jsonfile": "^6.1.0",
"puppeteer": "^20.7.2",

View File

@ -5,28 +5,30 @@ import fs from "fs";
import { getDomain } from "tldts";
import { Browser, launch } from "puppeteer";
import axios from "axios";
import { Dobey, LDJsonParser } from "./modules/websites";
import { Dobey, EP, LDJsonParser, Macrovet } from "./modules/websites";
import { createObjectCsvWriter } from "csv-writer";
import { CsvWriter } from "csv-writer/src/lib/csv-writer";
import { ObjectMap } from "csv-writer/src/lib/lang/object";
interface IProduct {
name: string;
blogUrl: string;
url: string;
domain: string;
inStock: boolean;
product_name: string;
product_url: string;
in_stock: boolean;
blog_url: string;
};
// Globals
let browser: Browser = null;
let sitemapUrl: string = "";
let allProducts: Array<IProduct> = [];
let allBlogUrls: Array<string> = [];
let writer: CsvWriter<ObjectMap<any>> = null;
// debugStart();
start();
async function debugStart() {
browser = await launch({ headless: false });
console.log(await crawlProductStock("https://www.kabels.nl/nl_nl/ewent-ew3240-draadloze-multi-connect-muis-600-2400-dpi-zwart-34863350.html"));
console.log(await crawlProductStock("https://www.macrovet.nl/K-Othrine-7-5-SC-1-liter-Insectenbestrijdingsmiddel/202561"));
}
async function start() {
@ -61,7 +63,7 @@ async function start() {
for (const sitemapUrl of sitemapUrls) {
console.log(`Crawling crawled sitemap: ${sitemapUrl}`);
console.log(`Crawling found sitemap: ${sitemapUrl}`);
const sitemapRequest = await axios.get(sitemapUrl);
const $ = cheerio.load(sitemapRequest.data, { xmlMode: true });
@ -76,15 +78,30 @@ async function start() {
console.log(`Crawled ${urls.length} urls from sitemaps.`);
console.log(`Starting crawler browser..`);
browser = await launch({ headless: true });
console.log(`Initialized puppeteer browser.`);
const today: Date = new Date();
const datecode: string = today.getFullYear().toString() + "-" + (today.getMonth() + 1).toString() + "-" + today.getDay().toString();
const domain: string = getDomain(sitemapUrl);
const file = `${domain}_${datecode}_report.csv`;
writer = createObjectCsvWriter({
path: file,
header: [
{id: 'domain', title: 'WEBSHOP_DOMAIN'},
{id: 'product_name', title: 'PRODUCT_NAME'},
{id: 'product_url', title: 'PRODUCT_URL'},
{id: 'in_stock', title: 'IN_STOCK'},
{id: 'blog_url', title: 'BLOG_URL'}
]
});
console.log(`Initialized csv writer.`);
console.log(`Initialization done.`);
console.log(`------------------------------------- \n`);
for (let url of urls) {
for (const url of urls) {
try {
allBlogUrls.push(url);
await crawlUrl(url);
} catch (error) {
continue;
@ -92,9 +109,9 @@ async function start() {
}
await browser.close();
await generateReport();
console.log(`Finished crawling all urls. Saved report to ${file}`);
} catch (error) {
console.error(`An Error Occured!`, error);
console.error(`A error occurred!`, error);
}
}
@ -115,21 +132,21 @@ async function crawlUrl(url: string) {
// Load html in cheerio object
const $ = cheerio.load(html);
let products: Array<any> = [];
const products: Array<IProduct> = [];
if ($(".row-products").length >= 1) {
console.log(`- Detected ${$(".row-products").length} content egg row type products.`);
$(".row-products .cegg-list-logo-title a").each((index, element) => {
let productUrl = $(element).attr("href");
let name = $(element).html().trim();
const productUrl = $(element).attr("href");
const name = $(element).html().trim();
products.push({
name: name,
blogUrl: url,
url: productUrl,
domain: "",
inStock: false,
product_name: name,
product_url: productUrl,
blog_url: url,
in_stock: false,
});
});
}
@ -142,47 +159,47 @@ async function crawlUrl(url: string) {
let name = $(element).find("h2").first().html().trim();
products.push({
name: name,
blogUrl: url,
url: productUrl,
domain: "",
inStock: false,
product_name: name,
product_url: productUrl,
blog_url: url,
in_stock: false,
});
});
}
console.log("- Checking product stocks...");
for (let index in products) {
for (const index in products) {
try {
let status = await crawlProductStock(products[index].url);
const status = await crawlProductStock(products[index].product_url);
products[index].domain = status[0];
products[index].url = status[1];
products[index].inStock = status[2];
products[index].product_url = status[1];
products[index].in_stock = status[2];
if (products[index].inStock) {
console.log(` [IN STOCK] ${products[index].name} - ${products[index].domain}`);
if (products[index].in_stock) {
console.log(` [IN STOCK] ${products[index].product_name} - ${products[index].domain}`);
} else {
console.log(` [OUT OF STOCK] ${products[index].name} - ${products[index].domain} - ${products[index].url}`);
console.log(` [OUT OF STOCK] ${products[index].product_name} - ${products[index].domain}`);
}
allProducts.push(products[index]);
// Write to csv
await writer.writeRecords([products[index]]);
} catch (error) {
console.log("- Skipping product...");
console.log(` [ERROR] ${products[index].product_name} - ${products[index].domain} - ${products[index].product_url}`, error);
continue;
}
}
console.log("");
console.log("");
} catch (error) {
console.log("-- Error while trying to crawl page! Skipping...");
}
}
async function crawlProductStock(url: string) {
async function crawlProductStock(url: string): Promise<[string, string, boolean]> {
try {
// Open new page and goto url
const page = await browser.newPage();
@ -218,7 +235,31 @@ async function crawlProductStock(url: string) {
case "petsonline.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "coolblue.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "bcc.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "azerty.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "cameranu.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "ep.nl":
return [domain, page.url(), await EP.check(html)];
case "alternate.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "macrovet.nl":
return [domain, page.url(), await Macrovet.check(html)];
case "ezydog.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
default:
console.error(`-- ${domain} is not an supported website! Cannot check stock!`);
return [domain, page.url(), false];
@ -227,58 +268,4 @@ async function crawlProductStock(url: string) {
console.error(error);
console.log("-- Error while trying to crawl page! Skipping...");
}
}
async function generateReport() {
console.log("Generating report...");
let today: Date = new Date();
let datecode: string = today.getFullYear().toString() + "-" + (today.getMonth() + 1).toString() + "-" + today.getDay().toString();
let domain: string = getDomain(sitemapUrl);
let file = `${domain}_${datecode}_report.txt`;
let totalProducts = 0;
let totalOutStock = 0;
for (let product of allProducts) {
totalProducts++;
if (product.inStock == false) {
totalOutStock++;
}
}
appendLn(file, `Content Egg Product Stock Crawler Report\n`);
appendLn(file, `Total Blog Urls: ${allBlogUrls.length}\n`);
appendLn(file, `Total Products: ${totalProducts}\n`);
appendLn(file, `Total Out of Stock: ${totalOutStock}\n`);
appendLn(file, ``);
let lastBlogUrl = "";
for (let product of allProducts) {
if (product.blogUrl != lastBlogUrl) {
appendLn(file, `\n\n`);
appendLn(file, `${product.blogUrl}\n`);
appendLn(file, `----------------------------------\n`);
}
if (product.inStock == false) {
appendLn(file, `${product.domain} - ${product.name} - ${product.url}\n`);
}
lastBlogUrl = product.blogUrl;
}
console.log("Report generated.");
}
function appendLn(file: string, text: string): Promise<void> {
return new Promise((resolve, reject) => {
fs.appendFile(file, text + "\n", (err) => {
if (err) {
reject(err);
} else {
resolve();
}
});
});
}

View File

@ -75,3 +75,37 @@ export namespace Dobey {
}
}
export namespace EP {
export async function check(html: string) {
try {
const $ = cheerio.load(html);
if ($(".product__info--stock_row").first().find("p").hasClass("is-green")) {
return true;
} else {
return false;
}
} catch (error) {
console.log(error);
console.error(`Error occured during stock check!`);
return false;
}
}
}
export namespace Macrovet {
export async function check(html: string) {
try {
const $ = cheerio.load(html);
if ($(".product-detail-price-container .product-available-stock").first().text().includes("0 in voorraad")) {
return false;
} else {
return true;
}
} catch (error) {
console.log(error);
console.error(`Error occured during stock check!`);
return false;
}
}
}