diff --git a/index.js b/index.js index c708c76..34b6eff 100755 --- a/index.js +++ b/index.js @@ -31,6 +31,8 @@ const cli = meow( Defaults to 3 e.g /blog/1, /blog/2, /blog/3 --reference-url Allows a reference URL to be used in testing + --strip-querystring Strips the query string from pages + --max-depth Maximum depth to crawl Examples $ backstop-crawl http://localhost @@ -49,6 +51,15 @@ if (cli.flags.limitSimilar) { } } +if (cli.flags.maxDepth) { + if (!Number.isInteger(cli.flags.maxDepth)) { + console.error( + `> Error: "${cli.flags.maxDepth}" isn't a valid depth` + ); + process.exit(1); + } +} + if (cli.flags.referenceUrl) { if (!validurl(cli.flags.referenceUrl)) { diff --git a/lib/crawl.js b/lib/crawl.js index e525ac7..78530cb 100644 --- a/lib/crawl.js +++ b/lib/crawl.js @@ -80,6 +80,14 @@ function crawl(url, flags) { crawler.scanSubdomains = true; } + if (flags.maxDepth) { + crawler.maxDepth = flags.maxDepth; + } + + if (flags.stripQuerystring) { + crawler.stripQuerystring = true; + } + // Skip this small blacklist of extensions crawler.addFetchCondition( queueItem => !queueItem.path.match(EXT_BLACKLIST)