Added more webshop support, updated system to use csv writer.

This commit is contained in:
Nick Leeman 2023-06-21 13:45:59 +02:00
parent 7c6025e5aa
commit 67a7b1d570
5 changed files with 125 additions and 470 deletions

2
.gitignore vendored
View File

@ -1,5 +1,7 @@
# ---> Node # ---> Node
# Logs # Logs
*.csv
logs logs
*.log *.log
npm-debug.log* npm-debug.log*

391
package-lock.json generated
View File

@ -11,8 +11,8 @@
"dependencies": { "dependencies": {
"axios": "^1.4.0", "axios": "^1.4.0",
"cheerio": "^1.0.0-rc.12", "cheerio": "^1.0.0-rc.12",
"csv-writer": "^1.6.0",
"fs-extra": "^11.1.1", "fs-extra": "^11.1.1",
"got": "^13.0.0",
"jsonfile": "^6.1.0", "jsonfile": "^6.1.0",
"puppeteer": "^20.7.2", "puppeteer": "^20.7.2",
"tldts": "^6.0.8", "tldts": "^6.0.8",
@ -84,28 +84,6 @@
} }
} }
}, },
"node_modules/@sindresorhus/is": {
"version": "5.4.1",
"resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.4.1.tgz",
"integrity": "sha512-axlrvsHlHlFmKKMEg4VyvMzFr93JWJj4eIfXY1STVuO2fsImCa7ncaiG5gC8HKOX590AW5RtRsC41/B+OfrSqw==",
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sindresorhus/is?sponsor=1"
}
},
"node_modules/@szmarczak/http-timer": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-5.0.1.tgz",
"integrity": "sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==",
"dependencies": {
"defer-to-connect": "^2.0.1"
},
"engines": {
"node": ">=14.16"
}
},
"node_modules/@types/got": { "node_modules/@types/got": {
"version": "9.6.12", "version": "9.6.12",
"resolved": "https://registry.npmjs.org/@types/got/-/got-9.6.12.tgz", "resolved": "https://registry.npmjs.org/@types/got/-/got-9.6.12.tgz",
@ -117,11 +95,6 @@
"form-data": "^2.5.0" "form-data": "^2.5.0"
} }
}, },
"node_modules/@types/http-cache-semantics": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.1.tgz",
"integrity": "sha512-SZs7ekbP8CN0txVG2xVRH6EgKmEm31BOxA07vkFaETzZz1xh+cbt8BcI0slpymvwhx5dlFnQG2rTlPVQn+iRPQ=="
},
"node_modules/@types/jsonfile": { "node_modules/@types/jsonfile": {
"version": "6.1.1", "version": "6.1.1",
"resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz", "resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz",
@ -329,31 +302,6 @@
"node": "*" "node": "*"
} }
}, },
"node_modules/cacheable-lookup": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz",
"integrity": "sha512-+qJyx4xiKra8mZrcwhjMRMUhD5NR1R8esPkzIYxX96JiecFoxAXFuz/GpR3+ev4PE1WamHip78wV0vcmPQtp8w==",
"engines": {
"node": ">=14.16"
}
},
"node_modules/cacheable-request": {
"version": "10.2.10",
"resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-10.2.10.tgz",
"integrity": "sha512-v6WB+Epm/qO4Hdlio/sfUn69r5Shgh39SsE9DSd4bIezP0mblOlObI+I0kUEM7J0JFc+I7pSeMeYaOYtX1N/VQ==",
"dependencies": {
"@types/http-cache-semantics": "^4.0.1",
"get-stream": "^6.0.1",
"http-cache-semantics": "^4.1.1",
"keyv": "^4.5.2",
"mimic-response": "^4.0.0",
"normalize-url": "^8.0.0",
"responselike": "^3.0.0"
},
"engines": {
"node": ">=14.16"
}
},
"node_modules/callsites": { "node_modules/callsites": {
"version": "3.1.0", "version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@ -515,6 +463,11 @@
"url": "https://github.com/sponsors/fb55" "url": "https://github.com/sponsors/fb55"
} }
}, },
"node_modules/csv-writer": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/csv-writer/-/csv-writer-1.6.0.tgz",
"integrity": "sha512-NOx7YDFWEsM/fTRAJjRpPp8t+MKRVvniAg9wQlUKx20MFrPs73WLJhFf5iteqrxNYnsy924K3Iroh3yNHeYd2g=="
},
"node_modules/data-uri-to-buffer": { "node_modules/data-uri-to-buffer": {
"version": "5.0.1", "version": "5.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz",
@ -539,44 +492,11 @@
} }
} }
}, },
"node_modules/decompress-response": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
"dependencies": {
"mimic-response": "^3.1.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/decompress-response/node_modules/mimic-response": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/deep-is": { "node_modules/deep-is": {
"version": "0.1.4", "version": "0.1.4",
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
"integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==" "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="
}, },
"node_modules/defer-to-connect": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz",
"integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg==",
"engines": {
"node": ">=10"
}
},
"node_modules/degenerator": { "node_modules/degenerator": {
"version": "4.0.3", "version": "4.0.3",
"resolved": "https://registry.npmjs.org/degenerator/-/degenerator-4.0.3.tgz", "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-4.0.3.tgz",
@ -831,14 +751,6 @@
"node": ">= 0.12" "node": ">= 0.12"
} }
}, },
"node_modules/form-data-encoder": {
"version": "2.1.4",
"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-2.1.4.tgz",
"integrity": "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw==",
"engines": {
"node": ">= 14.17"
}
},
"node_modules/fs-constants": { "node_modules/fs-constants": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
@ -865,17 +777,6 @@
"node": "6.* || 8.* || >= 10.*" "node": "6.* || 8.* || >= 10.*"
} }
}, },
"node_modules/get-stream": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
"integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/get-uri": { "node_modules/get-uri": {
"version": "6.0.1", "version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.1.tgz", "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.1.tgz",
@ -919,30 +820,6 @@
"node": ">= 4.0.0" "node": ">= 4.0.0"
} }
}, },
"node_modules/got": {
"version": "13.0.0",
"resolved": "https://registry.npmjs.org/got/-/got-13.0.0.tgz",
"integrity": "sha512-XfBk1CxOOScDcMr9O1yKkNaQyy865NbYs+F7dr4H0LZMVgCj2Le59k6PqbNHoL5ToeaEQUYh6c6yMfVcc6SJxA==",
"dependencies": {
"@sindresorhus/is": "^5.2.0",
"@szmarczak/http-timer": "^5.0.1",
"cacheable-lookup": "^7.0.0",
"cacheable-request": "^10.2.8",
"decompress-response": "^6.0.0",
"form-data-encoder": "^2.1.2",
"get-stream": "^6.0.1",
"http2-wrapper": "^2.1.10",
"lowercase-keys": "^3.0.0",
"p-cancelable": "^3.0.0",
"responselike": "^3.0.0"
},
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/sindresorhus/got?sponsor=1"
}
},
"node_modules/graceful-fs": { "node_modules/graceful-fs": {
"version": "4.2.4", "version": "4.2.4",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz",
@ -974,11 +851,6 @@
"entities": "^4.4.0" "entities": "^4.4.0"
} }
}, },
"node_modules/http-cache-semantics": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz",
"integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ=="
},
"node_modules/http-proxy-agent": { "node_modules/http-proxy-agent": {
"version": "7.0.0", "version": "7.0.0",
"resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz",
@ -991,18 +863,6 @@
"node": ">= 14" "node": ">= 14"
} }
}, },
"node_modules/http2-wrapper": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-2.2.0.tgz",
"integrity": "sha512-kZB0wxMo0sh1PehyjJUWRFEd99KC5TLjZ2cULC4f9iqJBAmKQQXEICjxl5iPJRwP40dpeHFqqhm7tYCvODpqpQ==",
"dependencies": {
"quick-lru": "^5.1.1",
"resolve-alpn": "^1.2.0"
},
"engines": {
"node": ">=10.19.0"
}
},
"node_modules/https-proxy-agent": { "node_modules/https-proxy-agent": {
"version": "7.0.0", "version": "7.0.0",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.0.tgz", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.0.tgz",
@ -1088,11 +948,6 @@
"js-yaml": "bin/js-yaml.js" "js-yaml": "bin/js-yaml.js"
} }
}, },
"node_modules/json-buffer": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz",
"integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ=="
},
"node_modules/json-parse-even-better-errors": { "node_modules/json-parse-even-better-errors": {
"version": "2.3.1", "version": "2.3.1",
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@ -1107,14 +962,6 @@
"universalify": "^2.0.0" "universalify": "^2.0.0"
} }
}, },
"node_modules/keyv": {
"version": "4.5.2",
"resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.2.tgz",
"integrity": "sha512-5MHbFaKn8cNSmVW7BYnijeAVlE4cYA/SVkifVgrh7yotnfhKmjuXpDKjrABLnT0SfHWV21P8ow07OGfRrNDg8g==",
"dependencies": {
"json-buffer": "3.0.1"
}
},
"node_modules/levn": { "node_modules/levn": {
"version": "0.3.0", "version": "0.3.0",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz",
@ -1132,17 +979,6 @@
"resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
"integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="
}, },
"node_modules/lowercase-keys": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-3.0.0.tgz",
"integrity": "sha512-ozCC6gdQ+glXOQsveKD0YsDy8DSQFjDTz4zyzEHNV5+JP5D62LmfDZ6o1cycFx9ouG940M5dE8C8CTewdj2YWQ==",
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/lru-cache": { "node_modules/lru-cache": {
"version": "7.18.3", "version": "7.18.3",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
@ -1170,17 +1006,6 @@
"node": ">= 0.6" "node": ">= 0.6"
} }
}, },
"node_modules/mimic-response": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-4.0.0.tgz",
"integrity": "sha512-e5ISH9xMYU0DzrT+jl8q2ze9D6eWBto+I8CNpe+VI+K2J/F/k3PdkdTdz4wvGVH4NTpo+NRYTVIuMQEMMcsLqg==",
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/mitt": { "node_modules/mitt": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.0.tgz", "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.0.tgz",
@ -1223,17 +1048,6 @@
} }
} }
}, },
"node_modules/normalize-url": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-8.0.0.tgz",
"integrity": "sha512-uVFpKhj5MheNBJRTiMZ9pE/7hD1QTeEvugSJW/OmLzAp78PB5O6adfMNTvmfKhXBkvCzC+rqifWcVYpGFwTjnw==",
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/nth-check": { "node_modules/nth-check": {
"version": "2.1.1", "version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
@ -1269,14 +1083,6 @@
"node": ">= 0.8.0" "node": ">= 0.8.0"
} }
}, },
"node_modules/p-cancelable": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-3.0.0.tgz",
"integrity": "sha512-mlVgR3PGuzlo0MmTdk4cXqXWlwQDLnONTAg6sm62XkMJEiRxN3GL3SffkYvqwonbkJBcrI7Uvv5Zh9yjvn2iUw==",
"engines": {
"node": ">=12.20"
}
},
"node_modules/pac-proxy-agent": { "node_modules/pac-proxy-agent": {
"version": "6.0.3", "version": "6.0.3",
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-6.0.3.tgz", "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-6.0.3.tgz",
@ -1457,17 +1263,6 @@
} }
} }
}, },
"node_modules/quick-lru": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz",
"integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/readable-stream": { "node_modules/readable-stream": {
"version": "3.6.2", "version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
@ -1489,11 +1284,6 @@
"node": ">=0.10.0" "node": ">=0.10.0"
} }
}, },
"node_modules/resolve-alpn": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz",
"integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g=="
},
"node_modules/resolve-from": { "node_modules/resolve-from": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@ -1502,20 +1292,6 @@
"node": ">=4" "node": ">=4"
} }
}, },
"node_modules/responselike": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/responselike/-/responselike-3.0.0.tgz",
"integrity": "sha512-40yHxbNcl2+rzXvZuVkrYohathsSJlMTXKryG5y8uciHv1+xDLHQpgjG64JUO9nrEq2jGLH6IZ8BcZyw3wrweg==",
"dependencies": {
"lowercase-keys": "^3.0.0"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/safe-buffer": { "node_modules/safe-buffer": {
"version": "5.2.1", "version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@ -1937,19 +1713,6 @@
"yargs": "17.7.1" "yargs": "17.7.1"
} }
}, },
"@sindresorhus/is": {
"version": "5.4.1",
"resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.4.1.tgz",
"integrity": "sha512-axlrvsHlHlFmKKMEg4VyvMzFr93JWJj4eIfXY1STVuO2fsImCa7ncaiG5gC8HKOX590AW5RtRsC41/B+OfrSqw=="
},
"@szmarczak/http-timer": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-5.0.1.tgz",
"integrity": "sha512-+PmQX0PiAYPMeVYe237LJAYvOMYW1j2rH5YROyS3b4CTVJum34HfRvKvAzozHAQG0TnHNdUfY9nCeUyRAs//cw==",
"requires": {
"defer-to-connect": "^2.0.1"
}
},
"@types/got": { "@types/got": {
"version": "9.6.12", "version": "9.6.12",
"resolved": "https://registry.npmjs.org/@types/got/-/got-9.6.12.tgz", "resolved": "https://registry.npmjs.org/@types/got/-/got-9.6.12.tgz",
@ -1961,11 +1724,6 @@
"form-data": "^2.5.0" "form-data": "^2.5.0"
} }
}, },
"@types/http-cache-semantics": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@types/http-cache-semantics/-/http-cache-semantics-4.0.1.tgz",
"integrity": "sha512-SZs7ekbP8CN0txVG2xVRH6EgKmEm31BOxA07vkFaETzZz1xh+cbt8BcI0slpymvwhx5dlFnQG2rTlPVQn+iRPQ=="
},
"@types/jsonfile": { "@types/jsonfile": {
"version": "6.1.1", "version": "6.1.1",
"resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz", "resolved": "https://registry.npmjs.org/@types/jsonfile/-/jsonfile-6.1.1.tgz",
@ -2114,25 +1872,6 @@
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
"integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==" "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ=="
}, },
"cacheable-lookup": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-7.0.0.tgz",
"integrity": "sha512-+qJyx4xiKra8mZrcwhjMRMUhD5NR1R8esPkzIYxX96JiecFoxAXFuz/GpR3+ev4PE1WamHip78wV0vcmPQtp8w=="
},
"cacheable-request": {
"version": "10.2.10",
"resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-10.2.10.tgz",
"integrity": "sha512-v6WB+Epm/qO4Hdlio/sfUn69r5Shgh39SsE9DSd4bIezP0mblOlObI+I0kUEM7J0JFc+I7pSeMeYaOYtX1N/VQ==",
"requires": {
"@types/http-cache-semantics": "^4.0.1",
"get-stream": "^6.0.1",
"http-cache-semantics": "^4.1.1",
"keyv": "^4.5.2",
"mimic-response": "^4.0.0",
"normalize-url": "^8.0.0",
"responselike": "^3.0.0"
}
},
"callsites": { "callsites": {
"version": "3.1.0", "version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@ -2255,6 +1994,11 @@
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==" "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw=="
}, },
"csv-writer": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/csv-writer/-/csv-writer-1.6.0.tgz",
"integrity": "sha512-NOx7YDFWEsM/fTRAJjRpPp8t+MKRVvniAg9wQlUKx20MFrPs73WLJhFf5iteqrxNYnsy924K3Iroh3yNHeYd2g=="
},
"data-uri-to-buffer": { "data-uri-to-buffer": {
"version": "5.0.1", "version": "5.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz", "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz",
@ -2268,31 +2012,11 @@
"ms": "2.1.2" "ms": "2.1.2"
} }
}, },
"decompress-response": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
"requires": {
"mimic-response": "^3.1.0"
},
"dependencies": {
"mimic-response": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ=="
}
}
},
"deep-is": { "deep-is": {
"version": "0.1.4", "version": "0.1.4",
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
"integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==" "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="
}, },
"defer-to-connect": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz",
"integrity": "sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg=="
},
"degenerator": { "degenerator": {
"version": "4.0.3", "version": "4.0.3",
"resolved": "https://registry.npmjs.org/degenerator/-/degenerator-4.0.3.tgz", "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-4.0.3.tgz",
@ -2460,11 +2184,6 @@
"mime-types": "^2.1.12" "mime-types": "^2.1.12"
} }
}, },
"form-data-encoder": {
"version": "2.1.4",
"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-2.1.4.tgz",
"integrity": "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw=="
},
"fs-constants": { "fs-constants": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
@ -2485,11 +2204,6 @@
"resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
"integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==" "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg=="
}, },
"get-stream": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz",
"integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg=="
},
"get-uri": { "get-uri": {
"version": "6.0.1", "version": "6.0.1",
"resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.1.tgz", "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.1.tgz",
@ -2526,24 +2240,6 @@
} }
} }
}, },
"got": {
"version": "13.0.0",
"resolved": "https://registry.npmjs.org/got/-/got-13.0.0.tgz",
"integrity": "sha512-XfBk1CxOOScDcMr9O1yKkNaQyy865NbYs+F7dr4H0LZMVgCj2Le59k6PqbNHoL5ToeaEQUYh6c6yMfVcc6SJxA==",
"requires": {
"@sindresorhus/is": "^5.2.0",
"@szmarczak/http-timer": "^5.0.1",
"cacheable-lookup": "^7.0.0",
"cacheable-request": "^10.2.8",
"decompress-response": "^6.0.0",
"form-data-encoder": "^2.1.2",
"get-stream": "^6.0.1",
"http2-wrapper": "^2.1.10",
"lowercase-keys": "^3.0.0",
"p-cancelable": "^3.0.0",
"responselike": "^3.0.0"
}
},
"graceful-fs": { "graceful-fs": {
"version": "4.2.4", "version": "4.2.4",
"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.4.tgz",
@ -2565,11 +2261,6 @@
"entities": "^4.4.0" "entities": "^4.4.0"
} }
}, },
"http-cache-semantics": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz",
"integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ=="
},
"http-proxy-agent": { "http-proxy-agent": {
"version": "7.0.0", "version": "7.0.0",
"resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz",
@ -2579,15 +2270,6 @@
"debug": "^4.3.4" "debug": "^4.3.4"
} }
}, },
"http2-wrapper": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-2.2.0.tgz",
"integrity": "sha512-kZB0wxMo0sh1PehyjJUWRFEd99KC5TLjZ2cULC4f9iqJBAmKQQXEICjxl5iPJRwP40dpeHFqqhm7tYCvODpqpQ==",
"requires": {
"quick-lru": "^5.1.1",
"resolve-alpn": "^1.2.0"
}
},
"https-proxy-agent": { "https-proxy-agent": {
"version": "7.0.0", "version": "7.0.0",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.0.tgz", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.0.tgz",
@ -2644,11 +2326,6 @@
"argparse": "^2.0.1" "argparse": "^2.0.1"
} }
}, },
"json-buffer": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz",
"integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ=="
},
"json-parse-even-better-errors": { "json-parse-even-better-errors": {
"version": "2.3.1", "version": "2.3.1",
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@ -2663,14 +2340,6 @@
"universalify": "^2.0.0" "universalify": "^2.0.0"
} }
}, },
"keyv": {
"version": "4.5.2",
"resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.2.tgz",
"integrity": "sha512-5MHbFaKn8cNSmVW7BYnijeAVlE4cYA/SVkifVgrh7yotnfhKmjuXpDKjrABLnT0SfHWV21P8ow07OGfRrNDg8g==",
"requires": {
"json-buffer": "3.0.1"
}
},
"levn": { "levn": {
"version": "0.3.0", "version": "0.3.0",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz",
@ -2685,11 +2354,6 @@
"resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
"integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="
}, },
"lowercase-keys": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-3.0.0.tgz",
"integrity": "sha512-ozCC6gdQ+glXOQsveKD0YsDy8DSQFjDTz4zyzEHNV5+JP5D62LmfDZ6o1cycFx9ouG940M5dE8C8CTewdj2YWQ=="
},
"lru-cache": { "lru-cache": {
"version": "7.18.3", "version": "7.18.3",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
@ -2708,11 +2372,6 @@
"mime-db": "1.52.0" "mime-db": "1.52.0"
} }
}, },
"mimic-response": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-4.0.0.tgz",
"integrity": "sha512-e5ISH9xMYU0DzrT+jl8q2ze9D6eWBto+I8CNpe+VI+K2J/F/k3PdkdTdz4wvGVH4NTpo+NRYTVIuMQEMMcsLqg=="
},
"mitt": { "mitt": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.0.tgz", "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.0.tgz",
@ -2741,11 +2400,6 @@
"whatwg-url": "^5.0.0" "whatwg-url": "^5.0.0"
} }
}, },
"normalize-url": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-8.0.0.tgz",
"integrity": "sha512-uVFpKhj5MheNBJRTiMZ9pE/7hD1QTeEvugSJW/OmLzAp78PB5O6adfMNTvmfKhXBkvCzC+rqifWcVYpGFwTjnw=="
},
"nth-check": { "nth-check": {
"version": "2.1.1", "version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
@ -2775,11 +2429,6 @@
"word-wrap": "~1.2.3" "word-wrap": "~1.2.3"
} }
}, },
"p-cancelable": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-3.0.0.tgz",
"integrity": "sha512-mlVgR3PGuzlo0MmTdk4cXqXWlwQDLnONTAg6sm62XkMJEiRxN3GL3SffkYvqwonbkJBcrI7Uvv5Zh9yjvn2iUw=="
},
"pac-proxy-agent": { "pac-proxy-agent": {
"version": "6.0.3", "version": "6.0.3",
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-6.0.3.tgz", "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-6.0.3.tgz",
@ -2912,11 +2561,6 @@
"ws": "8.13.0" "ws": "8.13.0"
} }
}, },
"quick-lru": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz",
"integrity": "sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA=="
},
"readable-stream": { "readable-stream": {
"version": "3.6.2", "version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
@ -2932,24 +2576,11 @@
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
"integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==" "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q=="
}, },
"resolve-alpn": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/resolve-alpn/-/resolve-alpn-1.2.1.tgz",
"integrity": "sha512-0a1F4l73/ZFZOakJnQ3FvkJ2+gSTQWz/r2KE5OdDY0TxPm5h4GkqkWWfM47T7HsbnOtcJVEF4epCVy6u7Q3K+g=="
},
"resolve-from": { "resolve-from": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
"integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==" "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g=="
}, },
"responselike": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/responselike/-/responselike-3.0.0.tgz",
"integrity": "sha512-40yHxbNcl2+rzXvZuVkrYohathsSJlMTXKryG5y8uciHv1+xDLHQpgjG64JUO9nrEq2jGLH6IZ8BcZyw3wrweg==",
"requires": {
"lowercase-keys": "^3.0.0"
}
},
"safe-buffer": { "safe-buffer": {
"version": "5.2.1", "version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",

View File

@ -15,6 +15,7 @@
"dependencies": { "dependencies": {
"axios": "^1.4.0", "axios": "^1.4.0",
"cheerio": "^1.0.0-rc.12", "cheerio": "^1.0.0-rc.12",
"csv-writer": "^1.6.0",
"fs-extra": "^11.1.1", "fs-extra": "^11.1.1",
"jsonfile": "^6.1.0", "jsonfile": "^6.1.0",
"puppeteer": "^20.7.2", "puppeteer": "^20.7.2",

View File

@ -5,28 +5,30 @@ import fs from "fs";
import { getDomain } from "tldts"; import { getDomain } from "tldts";
import { Browser, launch } from "puppeteer"; import { Browser, launch } from "puppeteer";
import axios from "axios"; import axios from "axios";
import { Dobey, LDJsonParser } from "./modules/websites"; import { Dobey, EP, LDJsonParser, Macrovet } from "./modules/websites";
import { createObjectCsvWriter } from "csv-writer";
import { CsvWriter } from "csv-writer/src/lib/csv-writer";
import { ObjectMap } from "csv-writer/src/lib/lang/object";
interface IProduct { interface IProduct {
name: string;
blogUrl: string;
url: string;
domain: string; domain: string;
inStock: boolean; product_name: string;
product_url: string;
in_stock: boolean;
blog_url: string;
}; };
// Globals // Globals
let browser: Browser = null; let browser: Browser = null;
let sitemapUrl: string = ""; let sitemapUrl: string = "";
let allProducts: Array<IProduct> = []; let writer: CsvWriter<ObjectMap<any>> = null;
let allBlogUrls: Array<string> = [];
// debugStart(); // debugStart();
start(); start();
async function debugStart() { async function debugStart() {
browser = await launch({ headless: false }); browser = await launch({ headless: false });
console.log(await crawlProductStock("https://www.kabels.nl/nl_nl/ewent-ew3240-draadloze-multi-connect-muis-600-2400-dpi-zwart-34863350.html")); console.log(await crawlProductStock("https://www.macrovet.nl/K-Othrine-7-5-SC-1-liter-Insectenbestrijdingsmiddel/202561"));
} }
async function start() { async function start() {
@ -61,7 +63,7 @@ async function start() {
for (const sitemapUrl of sitemapUrls) { for (const sitemapUrl of sitemapUrls) {
console.log(`Crawling crawled sitemap: ${sitemapUrl}`); console.log(`Crawling found sitemap: ${sitemapUrl}`);
const sitemapRequest = await axios.get(sitemapUrl); const sitemapRequest = await axios.get(sitemapUrl);
const $ = cheerio.load(sitemapRequest.data, { xmlMode: true }); const $ = cheerio.load(sitemapRequest.data, { xmlMode: true });
@ -76,15 +78,30 @@ async function start() {
console.log(`Crawled ${urls.length} urls from sitemaps.`); console.log(`Crawled ${urls.length} urls from sitemaps.`);
console.log(`Starting crawler browser..`);
browser = await launch({ headless: true }); browser = await launch({ headless: true });
console.log(`Initialized puppeteer browser.`);
const today: Date = new Date();
const datecode: string = today.getFullYear().toString() + "-" + (today.getMonth() + 1).toString() + "-" + today.getDay().toString();
const domain: string = getDomain(sitemapUrl);
const file = `${domain}_${datecode}_report.csv`;
writer = createObjectCsvWriter({
path: file,
header: [
{id: 'domain', title: 'WEBSHOP_DOMAIN'},
{id: 'product_name', title: 'PRODUCT_NAME'},
{id: 'product_url', title: 'PRODUCT_URL'},
{id: 'in_stock', title: 'IN_STOCK'},
{id: 'blog_url', title: 'BLOG_URL'}
]
});
console.log(`Initialized csv writer.`);
console.log(`Initialization done.`); console.log(`Initialization done.`);
console.log(`------------------------------------- \n`); console.log(`------------------------------------- \n`);
for (let url of urls) { for (const url of urls) {
try { try {
allBlogUrls.push(url);
await crawlUrl(url); await crawlUrl(url);
} catch (error) { } catch (error) {
continue; continue;
@ -92,9 +109,9 @@ async function start() {
} }
await browser.close(); await browser.close();
await generateReport(); console.log(`Finished crawling all urls. Saved report to ${file}`);
} catch (error) { } catch (error) {
console.error(`An Error Occured!`, error); console.error(`A error occurred!`, error);
} }
} }
@ -115,21 +132,21 @@ async function crawlUrl(url: string) {
// Load html in cheerio object // Load html in cheerio object
const $ = cheerio.load(html); const $ = cheerio.load(html);
let products: Array<any> = []; const products: Array<IProduct> = [];
if ($(".row-products").length >= 1) { if ($(".row-products").length >= 1) {
console.log(`- Detected ${$(".row-products").length} content egg row type products.`); console.log(`- Detected ${$(".row-products").length} content egg row type products.`);
$(".row-products .cegg-list-logo-title a").each((index, element) => { $(".row-products .cegg-list-logo-title a").each((index, element) => {
let productUrl = $(element).attr("href"); const productUrl = $(element).attr("href");
let name = $(element).html().trim(); const name = $(element).html().trim();
products.push({ products.push({
name: name,
blogUrl: url,
url: productUrl,
domain: "", domain: "",
inStock: false, product_name: name,
product_url: productUrl,
blog_url: url,
in_stock: false,
}); });
}); });
} }
@ -142,47 +159,47 @@ async function crawlUrl(url: string) {
let name = $(element).find("h2").first().html().trim(); let name = $(element).find("h2").first().html().trim();
products.push({ products.push({
name: name,
blogUrl: url,
url: productUrl,
domain: "", domain: "",
inStock: false, product_name: name,
product_url: productUrl,
blog_url: url,
in_stock: false,
}); });
}); });
} }
console.log("- Checking product stocks..."); console.log("- Checking product stocks...");
for (let index in products) { for (const index in products) {
try { try {
let status = await crawlProductStock(products[index].url); const status = await crawlProductStock(products[index].product_url);
products[index].domain = status[0]; products[index].domain = status[0];
products[index].url = status[1]; products[index].product_url = status[1];
products[index].inStock = status[2]; products[index].in_stock = status[2];
if (products[index].inStock) { if (products[index].in_stock) {
console.log(` [IN STOCK] ${products[index].name} - ${products[index].domain}`); console.log(` [IN STOCK] ${products[index].product_name} - ${products[index].domain}`);
} else { } else {
console.log(` [OUT OF STOCK] ${products[index].name} - ${products[index].domain} - ${products[index].url}`); console.log(` [OUT OF STOCK] ${products[index].product_name} - ${products[index].domain}`);
} }
allProducts.push(products[index]); // Write to csv
await writer.writeRecords([products[index]]);
} catch (error) { } catch (error) {
console.log("- Skipping product..."); console.log(` [ERROR] ${products[index].product_name} - ${products[index].domain} - ${products[index].product_url}`, error);
continue; continue;
} }
} }
console.log(""); console.log("");
console.log(""); console.log("");
} catch (error) { } catch (error) {
console.log("-- Error while trying to crawl page! Skipping..."); console.log("-- Error while trying to crawl page! Skipping...");
} }
} }
async function crawlProductStock(url: string) { async function crawlProductStock(url: string): Promise<[string, string, boolean]> {
try { try {
// Open new page and goto url // Open new page and goto url
const page = await browser.newPage(); const page = await browser.newPage();
@ -218,7 +235,31 @@ async function crawlProductStock(url: string) {
case "petsonline.nl": case "petsonline.nl":
return [domain, page.url(), await LDJsonParser.check(html)]; return [domain, page.url(), await LDJsonParser.check(html)];
case "coolblue.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "bcc.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "azerty.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "cameranu.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "ep.nl":
return [domain, page.url(), await EP.check(html)];
case "alternate.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
case "macrovet.nl":
return [domain, page.url(), await Macrovet.check(html)];
case "ezydog.nl":
return [domain, page.url(), await LDJsonParser.check(html)];
default: default:
console.error(`-- ${domain} is not an supported website! Cannot check stock!`); console.error(`-- ${domain} is not an supported website! Cannot check stock!`);
return [domain, page.url(), false]; return [domain, page.url(), false];
@ -227,58 +268,4 @@ async function crawlProductStock(url: string) {
console.error(error); console.error(error);
console.log("-- Error while trying to crawl page! Skipping..."); console.log("-- Error while trying to crawl page! Skipping...");
} }
}
async function generateReport() {
console.log("Generating report...");
let today: Date = new Date();
let datecode: string = today.getFullYear().toString() + "-" + (today.getMonth() + 1).toString() + "-" + today.getDay().toString();
let domain: string = getDomain(sitemapUrl);
let file = `${domain}_${datecode}_report.txt`;
let totalProducts = 0;
let totalOutStock = 0;
for (let product of allProducts) {
totalProducts++;
if (product.inStock == false) {
totalOutStock++;
}
}
appendLn(file, `Content Egg Product Stock Crawler Report\n`);
appendLn(file, `Total Blog Urls: ${allBlogUrls.length}\n`);
appendLn(file, `Total Products: ${totalProducts}\n`);
appendLn(file, `Total Out of Stock: ${totalOutStock}\n`);
appendLn(file, ``);
let lastBlogUrl = "";
for (let product of allProducts) {
if (product.blogUrl != lastBlogUrl) {
appendLn(file, `\n\n`);
appendLn(file, `${product.blogUrl}\n`);
appendLn(file, `----------------------------------\n`);
}
if (product.inStock == false) {
appendLn(file, `${product.domain} - ${product.name} - ${product.url}\n`);
}
lastBlogUrl = product.blogUrl;
}
console.log("Report generated.");
}
function appendLn(file: string, text: string): Promise<void> {
return new Promise((resolve, reject) => {
fs.appendFile(file, text + "\n", (err) => {
if (err) {
reject(err);
} else {
resolve();
}
});
});
} }

View File

@ -75,3 +75,37 @@ export namespace Dobey {
} }
} }
export namespace EP {
export async function check(html: string) {
try {
const $ = cheerio.load(html);
if ($(".product__info--stock_row").first().find("p").hasClass("is-green")) {
return true;
} else {
return false;
}
} catch (error) {
console.log(error);
console.error(`Error occured during stock check!`);
return false;
}
}
}
export namespace Macrovet {
export async function check(html: string) {
try {
const $ = cheerio.load(html);
if ($(".product-detail-price-container .product-available-stock").first().text().includes("0 in voorraad")) {
return false;
} else {
return true;
}
} catch (error) {
console.log(error);
console.error(`Error occured during stock check!`);
return false;
}
}
}