rijks-uploader

Upload Public Domain files from Rijksmuseum.nl to Wikimedia Commons
git clone http://git.hanabi.in/repos/rijks-uploader.git
Log | Files | Refs | README | LICENSE

helperFns.js (3842B)


      1 const fetch = require("node-fetch");
      2 const Wikiapi = require("wikiapi");
      3 
      4 const { promisify } = require("util");
      5 const fs = require("fs");
      6 
      7 const readFile = promisify(fs.readFile);
      8 const appendFile = promisify(fs.appendFile);
      9 
     10 const {
     11   author,
     12   cats,
     13   errFile,
     14   license,
     15   LR,
     16   params,
     17   statusFile,
     18   API,
     19   PASSWORD,
     20   USERNAME,
     21   WIKI_API,
     22 } = require("./utils.js");
     23 
     24 function getDate(dating) {
     25   const yearEarly = dating?.yearEarly || "";
     26   const yearLate = dating?.yearLate || "";
     27   return `{{other date|~|${yearEarly}|${yearLate}}}`;
     28 }
     29 
     30 function updateContent(pageContent = '', moreCats = []) {
     31   const moreCatsStr = moreCats.reduce((acc, cur) => acc + `[[Category:Uncategorised images of the Rijksmuseum (${cur})]]\n`, '');
     32   const finalCats = `${cats}\n${moreCatsStr}`;
     33   return (
     34     pageContent + `\n== {{int:license-header}} ==\n${license}\n${LR}\n\n${finalCats}`
     35   );
     36 }
     37 
     38 function getDesc(artObject, collectionID) {
     39   return (
     40     artObject?.description || `Collection ${collectionID} of the Rijksmuseum`
     41   );
     42 }
     43 
     44 function getTitle(artObject, collectionID) {
     45   let title = artObject?.longTitle || artObject?.title;
     46   if (title) title += " ";
     47   title += `${collectionID} - Rijksmuseum`;
     48   title = title.replace(/\[/g, '').replace(/\]/g, '');
     49   return title;
     50 }
     51 
     52 function getUploadObj(media_url, filename, date, description, source) {
     53   return {
     54     media_url,
     55     comment: "uploaded using API",
     56     filename,
     57     text: {
     58       author,
     59       date,
     60       description,
     61       source,
     62     },
     63     // ignorewarnings: 1,
     64   };
     65 }
     66 
     67 async function extractIdAndUpload(line) {
     68   let collectionID;
     69   try {
     70     const [colID, source, ...rest] = line.split(",");
     71     collectionID = stripBOM(colID);
     72     const RijksAPIURL = `${API}${collectionID}${params}`;
     73     const rijksAPIresult = await fetch(RijksAPIURL);
     74     const jsonRijksAPIresult = await rijksAPIresult.json();
     75     const { artObject } = jsonRijksAPIresult;
     76     if (!artObject) {
     77       const content = `${collectionID},WRONG_RIJKS_API_RES\n`;
     78       throw new Error(content);
     79     }
     80     const copyrightStatus = artObject?.copyrightHolder;
     81     const { hasImage, webImage } = artObject;
     82     const media_url = webImage?.url;
     83     if (copyrightStatus || !hasImage || !webImage || !media_url) {
     84       const content = `${collectionID},COPYRIGHTED or MISSING IMAGE\n`;
     85       throw new Error(content);
     86     }
     87     const description = getDesc(artObject, collectionID);
     88     const title = getTitle(artObject, collectionID);
     89     const date = getDate(artObject?.dating);
     90     const media = await fetch(media_url);
     91     const fileExt = '.' + media.headers?.get('content-type')?.split('/')[1];
     92     if (!fileExt) {
     93       const content = `${collectionID},MISSING EXT\n`;
     94       throw new Error(content);
     95     }
     96     const filename = title + fileExt;
     97     const wiki = new Wikiapi(WIKI_API);
     98     await wiki.login(USERNAME, PASSWORD);
     99     const uploadObj = getUploadObj(
    100       media_url,
    101       filename,
    102       date,
    103       description,
    104       source
    105     );
    106     await wiki.upload(uploadObj);
    107     const fileTitle = "File:" + filename;
    108     const pageData = await wiki.page(fileTitle);
    109     const pageContent = pageData?.wikitext;
    110     const moreCats = artObject?.objectCollection;
    111     const updatedContent = updateContent(pageContent, moreCats);
    112     await wiki.edit_page(fileTitle, updatedContent);
    113     const content = `${collectionID},DONE\n`;
    114     await appendFile(statusFile, content);
    115   } catch (err) {
    116     const errStr = err.toString();
    117     let content = `${collectionID},${errStr}`;
    118     if(!content.endsWith('\n')) content+="\n";
    119     await appendFile(errFile, content);
    120   }
    121 }
    122 
    123 function stripBOM(string = '') {
    124   return (string.charCodeAt(0) === 0xFEFF) ? string.slice(1) : string;
    125 }
    126 
    127 module.exports = {
    128   appendFile,
    129   extractIdAndUpload,
    130   getDate,
    131   getDesc,
    132   getTitle,
    133   getUploadObj,
    134   readFile,
    135   updateContent,
    136 };