commit fe05e9966a69342bcfd82b0bf81e7ddcbbaa3999
Author: Agastya Chandrakant <me@hanabi.in>
Date: Sun, 24 Jan 2021 19:12:43 +0530
refactored code
Diffstat:
6 files changed, 227 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+node_modules
+.env
+data.csv
+original-data.csv
+*txt
+\ No newline at end of file
diff --git a/helperFns.js b/helperFns.js
@@ -0,0 +1,136 @@
+const fetch = require("node-fetch");
+const Wikiapi = require("wikiapi");
+
+const { promisify } = require("util");
+const fs = require("fs");
+
+const readFile = promisify(fs.readFile);
+const appendFile = promisify(fs.appendFile);
+
+const {
+ author,
+ cats,
+ errFile,
+ license,
+ LR,
+ params,
+ statusFile,
+ API,
+ PASSWORD,
+ USERNAME,
+ WIKI_API,
+} = require("./utils.js");
+
+function getDate(dating) {
+ const yearEarly = dating?.yearEarly || "";
+ const yearLate = dating?.yearLate || "";
+ return `{{other date|~|${yearEarly}|${yearLate}}}`;
+}
+
+function updateContent(pageContent = '', moreCats = []) {
+ const moreCatsStr = moreCats.reduce((acc, cur) => acc + `[[Category:Uncategorised images of the Rijksmuseum (${cur})]]\n`, '');
+ const finalCats = `${cats}\n${moreCatsStr}`;
+ return (
+ pageContent + `\n== {{int:license-header}} ==\n${license}\n${LR}\n\n${finalCats}`
+ );
+}
+
+function getDesc(artObject, collectionID) {
+ return (
+ artObject?.description || `Collection ${collectionID} of the Rijksmuseum`
+ );
+}
+
+function getTitle(artObject, collectionID) {
+ let title = artObject?.longTitle || artObject?.title;
+ if (title) title += " ";
+ title += `${collectionID} - Rijksmuseum`;
+ title = title.replace(/\[/g, '').replace(/\]/g, '');
+ return title;
+}
+
+function getUploadObj(media_url, filename, date, description, source) {
+ return {
+ media_url,
+ comment: "uploaded using API",
+ filename,
+ text: {
+ author,
+ date,
+ description,
+ source,
+ },
+ // ignorewarnings: 1,
+ };
+}
+
+async function extractIdAndUpload(line) {
+ try {
+ const [colID, source, ...rest] = line.split(",");
+ const collectionID = stripBOM(colID);
+ const RijksAPIURL = `${API}${collectionID}${params}`;
+ const rijksAPIresult = await fetch(RijksAPIURL);
+ const jsonRijksAPIresult = await rijksAPIresult.json();
+ const { artObject } = jsonRijksAPIresult;
+ if (!artObject) {
+ const content = `${collectionID},WRONG_RIJKS_API_RES\n`;
+ throw new Error(content);
+ }
+ const copyrightStatus = artObject?.copyrightHolder;
+ const { hasImage, webImage } = artObject;
+ const media_url = webImage?.url;
+ if (copyrightStatus || !hasImage || !webImage || !media_url) {
+ const content = `${collectionID},COPYRIGHTED or MISSING IMAGE\n`;
+ throw new Error(content);
+ }
+ const description = getDesc(artObject, collectionID);
+ const title = getTitle(artObject, collectionID);
+ const date = getDate(artObject?.dating);
+ const media = await fetch(media_url);
+ const media_blob = await media.blob();
+ const fileExt = "." + media_blob?.type.split("/")[1];
+ if (!fileExt) {
+ const content = `${collectionID},MISSING EXT\n`;
+ throw new Error(content);
+ }
+ const filename = title + fileExt;
+ const wiki = new Wikiapi(WIKI_API);
+ await wiki.login(USERNAME, PASSWORD);
+ const uploadObj = getUploadObj(
+ media_url,
+ filename,
+ date,
+ description,
+ source
+ );
+ await wiki.upload(uploadObj);
+ const fileTitle = "File:" + filename;
+ const pageData = await wiki.page(fileTitle);
+ const pageContent = pageData?.wikitext;
+ const moreCats = artObject?.objectCollection;
+ const updatedContent = updateContent(pageContent, moreCats);
+ await wiki.edit_page(fileTitle, updatedContent);
+ const content = `${collectionID},DONE\n`;
+ await appendFile(statusFile, content);
+ } catch (err) {
+ const errStr = err.toString();
+ let content = errStr;
+ if(!content.endsWith('\n')) content+="\n";
+ await appendFile(errFile, content);
+ }
+}
+
+function stripBOM(string = '') {
+ return (string.charCodeAt(0) === 0xFEFF) ? string.slice(1) : string;
+}
+
+module.exports = {
+ appendFile,
+ extractIdAndUpload,
+ getDate,
+ getDesc,
+ getTitle,
+ getUploadObj,
+ readFile,
+ updateContent,
+};
diff --git a/index.js b/index.js
@@ -0,0 +1,19 @@
+const { appendFile, extractIdAndUpload, readFile } = require("./helperFns.js");
+const { dataFile, errFile, statusFile } = require("./utils.js");
+
+async function main() {
+ try {
+ const CSVData = await readFile(dataFile, { encoding: "utf8" });
+ const CSVLines = CSVData.split("\n");
+ CSVLines.forEach(async line => await extractIdAndUpload(line));
+ } catch (err) {
+ const errStr = err.toString();
+ const content = errStr + "\n";
+ await appendFile(errFile, content);
+ } finally {
+ const content = `ALL,DONE\n`;
+ await appendFile(statusFile, content);
+ }
+}
+
+main();
diff --git a/package.json b/package.json
@@ -0,0 +1,7 @@
+{
+ "dependencies": {
+ "dotenv": "^8.2.0",
+ "node-fetch": "^2.6.1",
+ "wikiapi": "^1.14.0"
+ }
+}
diff --git a/utils.js b/utils.js
@@ -0,0 +1,34 @@
+const dotenv = require("dotenv");
+dotenv.config();
+const { API_KEY, USERNAME, PASSWORD } = process.env;
+
+const path = require("path");
+
+const dataFile = path.join(__dirname, "data.csv");
+const errFile = path.join(__dirname, "err.txt");
+const statusFile = path.join(__dirname, "status.txt");
+
+const API = "https://www.rijksmuseum.nl/api/en/collection/";
+const params = `?format=json&key=${API_KEY}&culture=en`;
+
+const WIKI_API = "https://commons.wikimedia.org/w/api.php";
+
+const author = "[[w:Rijksmuseum|Rijksmuseum]]";
+const license = `{{cc-zero|Rijksmuseum}}`;
+const LR = `{{LicenseReview}}`;
+const cats = "[[Category:Media from Rijksmuseum]]\n[[Category:Uncategorized images of the Rijksmuseum]]";
+
+module.exports = {
+ API,
+ dataFile,
+ errFile,
+ params,
+ PASSWORD,
+ USERNAME,
+ WIKI_API,
+ author,
+ license,
+ LR,
+ cats,
+ statusFile
+};
diff --git a/yarn.lock b/yarn.lock
@@ -0,0 +1,25 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+cejs@latest:
+ version "4.1.1"
+ resolved "https://registry.yarnpkg.com/cejs/-/cejs-4.1.1.tgz#c3d473cc323cb4d8ca713848a8b0c1fb8e415b39"
+ integrity sha512-knpiBF8xdoCQzh/nIS4YKJfMLbYFkZ8nY8Rt3ED44WXq9SQ2x3bEIHEDDWS+owCvkt5TGTzFAF+gzMPEQn3IMw==
+
+dotenv@^8.2.0:
+ version "8.2.0"
+ resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-8.2.0.tgz#97e619259ada750eea3e4ea3e26bceea5424b16a"
+ integrity sha512-8sJ78ElpbDJBHNeBzUbUVLsqKdccaa/BXF1uPTw3GrvQTBgrQrtObr2mUrE38vzYd8cEv+m/JBfDLioYcfXoaw==
+
+node-fetch@^2.6.1:
+ version "2.6.1"
+ resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052"
+ integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==
+
+wikiapi@^1.14.0:
+ version "1.14.0"
+ resolved "https://registry.yarnpkg.com/wikiapi/-/wikiapi-1.14.0.tgz#e690652f979b585639208a4a9928ad4fc438edf8"
+ integrity sha512-VPEPKGbXp1xGSOqTVFrfPAs9yv6jkmCKFG79hOYNct3SmF3WaLXK9ipp1mrNK+iwB6tHl0TouSlZVB9Qr3uZ/g==
+ dependencies:
+ cejs latest