From b21480a69501e8d4ac05c1881c8edb5b1ba8833b Mon Sep 17 00:00:00 2001 From: William Hurley Date: Thu, 25 Jun 2020 10:21:29 -0400 Subject: [PATCH 1/2] Adding --max-depth and --strip-querystring --- index.js | 11 +++++++++++ lib/crawl.js | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/index.js b/index.js index c708c76..34b6eff 100755 --- a/index.js +++ b/index.js @@ -31,6 +31,8 @@ const cli = meow( Defaults to 3 e.g /blog/1, /blog/2, /blog/3 --reference-url Allows a reference URL to be used in testing + --strip-querystring Strips the query string from pages + --max-depth Maximum depth to crawl Examples $ backstop-crawl http://localhost @@ -49,6 +51,15 @@ if (cli.flags.limitSimilar) { } } +if (cli.flags.maxDepth) { + if (!Number.isInteger(cli.flags.maxDepth)) { + console.error( + `> Error: "${cli.flags.maxDepth}" isn't a valid depth` + ); + process.exit(1); + } +} + if (cli.flags.referenceUrl) { if (!validurl(cli.flags.referenceUrl)) { diff --git a/lib/crawl.js b/lib/crawl.js index e525ac7..d76b325 100644 --- a/lib/crawl.js +++ b/lib/crawl.js @@ -80,6 +80,14 @@ function crawl(url, flags) { crawler.scanSubdomains = true; } + if (flags.maxDepth) { + crawler.maxDepth = flags.maxDepth; + } + + if (flags.stripQuerystring) { + crawler.stripQueryString = true; + } + // Skip this small blacklist of extensions crawler.addFetchCondition( queueItem => !queueItem.path.match(EXT_BLACKLIST) From ce4643d36f512a52c0473df7e309ce0d88160455 Mon Sep 17 00:00:00 2001 From: William Hurley Date: Thu, 25 Jun 2020 10:50:26 -0400 Subject: [PATCH 2/2] Fixing parameter name --- lib/crawl.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crawl.js b/lib/crawl.js index d76b325..78530cb 100644 --- a/lib/crawl.js +++ b/lib/crawl.js @@ -85,7 +85,7 @@ function crawl(url, flags) { } if (flags.stripQuerystring) { - crawler.stripQueryString = true; + crawler.stripQuerystring = true; } // Skip this small blacklist of extensions