build setup & clean

This commit is contained in:
Nick Leeman 2023-06-30 14:16:16 +02:00
parent a0cb920091
commit c1162550f4
6 changed files with 2254 additions and 15 deletions

3
.gitignore vendored
View File

@ -1,5 +1,8 @@
# ---> Node
# Logs
build/**
compiled/**
*.csv
logs

2223
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -2,9 +2,21 @@
"name": "@aterve/contenteggstockcrawler",
"version": "1.0.2",
"description": "",
"main": "./dist/core.js",
"main": "build/cli.js",
"bin": {
"crawler": "build/cli.js"
},
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
"test": "echo \"Error: no test specified\" && exit 1",
"package-linux": "pkg . --targets node18-linux-x64 --output compiled/crawler-linux64",
"package-win": "pkg . --targets node18-windows-x64 --output compiled/crawler-win64",
"build": "tsc"
},
"pkg": {
"scripts": "build/**/*.js",
"assets": [
"node_modules/**"
]
},
"repository": {
"type": "git",
@ -25,6 +37,7 @@
},
"devDependencies": {
"@types/got": "^9.6.12",
"@types/jsonfile": "^6.1.1"
"@types/jsonfile": "^6.1.1",
"pkg": "^5.8.1"
}
}

View File

@ -1,6 +1,6 @@
import jsonfile from "jsonfile";
import cheerio from "cheerio";
import cheerio, { CheerioAPI } from "cheerio";
import fs from "fs";
import { getDomain } from "tldts";
import { Browser, launch } from "puppeteer";
@ -41,11 +41,17 @@ async function start() {
let params = process.argv.slice(2);
sitemapUrl = params[0];
// Crawl Sitemap
console.log(`Crawling input sitemap: ${sitemapUrl}`);
const sitemapRequest = await axios.get(sitemapUrl);
const $ = cheerio.load(sitemapRequest.data, { xmlMode: true });
let $: CheerioAPI = null;
try {
// Crawl Sitemap
console.log(`Crawling input sitemap: ${sitemapUrl}`);
const sitemapRequest = await axios.get(sitemapUrl);
$ = cheerio.load(sitemapRequest.data, { xmlMode: true });
} catch (error) {
console.log("[ERROR] Error occurred crawling main sitemap.")
process.exit(0);
}
$('loc').each(function() {
const url = $(this).text();

View File

@ -22,14 +22,10 @@ export namespace LDJsonParser {
$('script[type="application/ld+json"]').each((index, element) => {
snippets.push($(element).html());
});
console.log("Found " + snippets.length + " snippets");
for (const snippet of snippets) {
try {
let json = JSON.parse(snippet);
console.log(json);
if (json["@type"]) {
if (json["@type"].includes("Product")) {

View File

@ -6,7 +6,7 @@
"noImplicitAny": true,
"moduleResolution": "node",
"sourceMap": true,
"outDir": "dist",
"outDir": "build",
"baseUrl": ".",
"paths": {
"*": ["node_modules/*", "src/types/*"]