-
-
Notifications
You must be signed in to change notification settings - Fork 7
/
download-images.js
86 lines (69 loc) · 2.26 KB
/
download-images.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/**
Download images from a file containing URLS.
The file must contain a single URL per line.
Usage:
node ./download-images.js PATH/TO/IMAGE_NET/FILE.txt PATH/TO/OUTPUT/FOLDER
It will look something like this:
# create the output folders
mkdir -p datasets/hotdog/ && mkdir -p datasets/not_hotdog/
# process different input files
node ./download-images.js plants.txt datasets/not_hotdog/
node ./download-images.js hotdog.txt datasets/hotdog/
*/
const path = require("path");
const fs = require("fs");
const http = require("http");
const https = require("https");
const Stream = require("stream").Transform;
// the first argument is the path to the file containing the URLs
const inputFile = path.join(process.cwd(), process.argv[2]);
// the second argument is the path to the folder in which we will store the downloaded images
const outputDir = path.join(process.cwd(), process.argv[3]);
// we will prefix the image names by the name of the input file
const prefix = path.basename(inputFile).replace(path.extname(inputFile), '');
// read the input file and create an array of URLs
const imageURLS = fs
.readFileSync(inputFile, "utf8")
.split("\n")
.map(url => url.trim())
.filter(url => url);
let successCount = 0;
let errorCount = 0;
// after each attempt to download an image,
// print the progress and exit when we reach the end
function onEnd(err) {
if (err) {
errorCount += 1;
} else {
successCount += 1;
}
console.log(
`${err ? "☠️" : "✅"} ${successCount + errorCount} / ${imageURLS.length}`
);
if (successCount + errorCount === imageURLS.length) {
console.log();
console.log(`${errorCount} fail\n${successCount} success`);
process.exit();
}
}
imageURLS.forEach((url, i) => {
// check if we need an http or https request
const { request } = url.indexOf("http:") === 0 ? http : https;
const imageName = `${prefix}_${i}${path.extname(url)}`;
const r = request(url.trim(), response => {
const data = new Stream();
response.on("data", chunk => {
data.push(chunk);
});
response.on("error", onEnd);
response.on("end", () => {
fs.writeFileSync(
path.join(outputDir, imageName),
data.read()
);
onEnd();
});
});
r.on("error", onEnd);
r.end();
});