From f726999af2714f9ed69d4f60ad36aec9e089af40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Mond=C3=A9jar?= Date: Sat, 2 Oct 2021 16:46:07 +0200 Subject: [PATCH] Unify SVGs for HTML safe titles reporting encoding errors in linting (#6522) * Basic implementation * Fix error * Update icons * Tweak implementation * Minor changes * Apply suggested changes * Don't encode 34 and 39 codepoints (double and single quotes) * Fix comments --- .svglintrc.js | 163 +++++++++++++++++++++++++++++- icons/aeromexico.svg | 2 +- icons/allocine.svg | 2 +- icons/citroen.svg | 2 +- icons/dassaultsystemes.svg | 2 +- icons/letsencrypt.svg | 2 +- icons/macys.svg | 2 +- icons/metrodelaciudaddemexico.svg | 2 +- icons/metrodeparis.svg | 2 +- icons/pokemon.svg | 2 +- icons/rte.svg | 2 +- icons/saopaulometro.svg | 2 +- icons/skoda.svg | 2 +- icons/tado.svg | 2 +- package.json | 1 + scripts/utils.js | 10 +- 16 files changed, 179 insertions(+), 21 deletions(-) diff --git a/.svglintrc.js b/.svglintrc.js index e3cca01e3..45b1be49b 100644 --- a/.svglintrc.js +++ b/.svglintrc.js @@ -2,6 +2,7 @@ const fs = require('fs'); const data = require("./_data/simple-icons.json"); const { htmlFriendlyToTitle } = require("./scripts/utils.js"); +const htmlNamedEntities = require("named-html-entities-json"); const svgpath = require("svgpath"); const svgPathBbox = require("svg-path-bbox"); const parsePath = require("svg-path-segments"); @@ -69,6 +70,29 @@ function getPathDIndex(svgFileContent) { return svgFileContent.indexOf(pathDStart) + pathDStart.length; } +/** + * Get the index at which the text of the first `` tag starts. + * @param svgFileContent The raw SVG as text. + **/ +function getTitleTextIndex(svgFileContent) { + const titleStart = ''; + return svgFileContent.indexOf(titleStart) + titleStart.length; +} + +/** + * Convert a hexadecimal number passed as string to decimal number as integer. + * @param hex The hexadecimal number representation to convert. + **/ +function hexadecimalToDecimal(hex) { + let result = 0, digitValue; + hex = hex.toLowerCase(); + for (var i = 0; i < hex.length; i++) { + digitValue = '0123456789abcdefgh'.indexOf(hex[i]); + result = result * 16 + digitValue; + } + return result; +} + if (updateIgnoreFile) { process.on('exit', () => { // ensure object output order is consistent due to async svglint processing @@ -130,11 +154,140 @@ module.exports = { function(reporter, $, ast) { reporter.name = "icon-title"; - const iconTitleText = $.find("title").text(); - const iconName = htmlFriendlyToTitle(iconTitleText); - const iconExists = data.icons.some(icon => icon.title === iconName); - if (!iconExists) { - reporter.error(`No icon with title "${iconName}" found in simple-icons.json`); + const iconTitleText = $.find("title").text(), + xmlNamedEntitiesCodepoints = [38, 60, 62], + xmlNamedEntities = ["amp", "lt", "gt"]; + let _validCodepointsRepr = true; + + // avoid character codepoints as hexadecimal representation + const hexadecimalCodepoints = Array.from( + iconTitleText.matchAll(/&#x([A-Fa-f0-9]+);/g) + ); + if (hexadecimalCodepoints.length > 0) { + _validCodepointsRepr = false; + + hexadecimalCodepoints.forEach(match => { + const charHexReprIndex = getTitleTextIndex(ast.source) + match.index + 1; + const charDec = hexadecimalToDecimal(match[1]); + + let charRepr; + if (xmlNamedEntitiesCodepoints.includes(charDec)) { + charRepr = `&${xmlNamedEntities[xmlNamedEntitiesCodepoints.indexOf(charDec)]};`; + } else if (charDec < 128) { + charRepr = String.fromCodePoint(charDec); + } else { + charRepr = `&#${charDec};`; + } + + reporter.error( + `Hexadecimal representation of encoded character "${match[0]}" found at index ${charHexReprIndex}:` + + ` replace it with "${charRepr}".` + ); + }) + } + + // avoid character codepoints as named entities + const namedEntitiesCodepoints = Array.from( + iconTitleText.matchAll(/&([A-Za-z0-9]+);/g) + ); + if (namedEntitiesCodepoints.length > 0) { + namedEntitiesCodepoints.forEach(match => { + const namedEntiyReprIndex = getTitleTextIndex(ast.source) + match.index + 1; + + if (!xmlNamedEntities.includes(match[1].toLowerCase())) { + _validCodepointsRepr = false; + const namedEntityJsRepr = htmlNamedEntities[match[1]]; + let replacement; + + if (namedEntityJsRepr === undefined || namedEntityJsRepr.length != 1) { + replacement = 'its decimal or literal representation'; + } else { + const namedEntityDec = namedEntityJsRepr.codePointAt(0); + if (namedEntityDec < 128) { + replacement = `"${namedEntityJsRepr}"`; + } else { + replacement = `"&#${namedEntityDec};"`; + } + } + + reporter.error( + `Named entity representation of encoded character "${match[0]}" found at index ${namedEntiyReprIndex}.` + + ` Replace it with ${replacement}.` + ); + } + }) + } + + if (_validCodepointsRepr) { + // compare encoded title with original title and report error if not equal + const encodingMatches = Array.from(iconTitleText.matchAll(/&(#([0-9]+)|(amp|quot|lt|gt));/g)), + encodedBuf = []; + + const _indexesToIgnore = []; + for (let m = 0; m < encodingMatches.length; m++) { + let index = encodingMatches[m].index; + for (let r = index; r < index + encodingMatches[m][0].length; r++) { + _indexesToIgnore.push(r) + } + } + + for (let i = iconTitleText.length - 1; i >= 0; i--) { + if (_indexesToIgnore.includes(i)) { + encodedBuf.unshift(iconTitleText[i]); + } else { + // encode all non ascii characters plus "'&<> (XML named entities) + let charDecimalCode = iconTitleText.charCodeAt(i); + + if (charDecimalCode > 127) { + encodedBuf.unshift(`&#${charDecimalCode};`); + } else if (xmlNamedEntitiesCodepoints.includes(charDecimalCode)) { + encodedBuf.unshift( + `&${xmlNamedEntities[xmlNamedEntitiesCodepoints.indexOf(charDecimalCode)]};` + ); + } else { + encodedBuf.unshift(iconTitleText[i]); + } + } + } + const encodedIconTitleText = encodedBuf.join(''); + if (encodedIconTitleText !== iconTitleText) { + _validCodepointsRepr = false; + + reporter.error( + `Unencoded unicode characters found in title "${iconTitleText}":` + + ` rewrite it as "${encodedIconTitleText}".` + ); + } + + // check if there are some other encoded characters in decimal notation + // which shouldn't be encoded + encodingMatches.filter(m => !isNaN(m[2])).forEach(match => { + const decimalNumber = parseInt(match[2]); + if (decimalNumber < 128) { + _validCodepointsRepr = false; + + const decimalCodepointCharIndex = getTitleTextIndex(ast.source) + match.index + 1; + if (xmlNamedEntitiesCodepoints.includes(decimalNumber)) { + replacement = `"&${xmlNamedEntities[xmlNamedEntitiesCodepoints.indexOf(decimalNumber)]};"`; + } else { + replacement = String.fromCharCode(decimalNumber); + replacement = replacement == '"' ? `'"'` : `"${replacement}"`; + } + + reporter.error( + `Unnecessary encoded character "${match[0]}" found at index ${decimalCodepointCharIndex}:` + + ` replace it with ${replacement}.` + ); + } + }); + + if (_validCodepointsRepr) { + const iconName = htmlFriendlyToTitle(iconTitleText); + const iconExists = data.icons.some(icon => icon.title === iconName); + if (!iconExists) { + reporter.error(`No icon with title "${iconName}" found in simple-icons.json`); + } + } } }, function(reporter, $, ast) { diff --git a/icons/aeromexico.svg b/icons/aeromexico.svg index f7d77e3e0..a27f71996 100644 --- a/icons/aeromexico.svg +++ b/icons/aeromexico.svg @@ -1 +1 @@ -<svg role="img" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><title>Aeroméxico \ No newline at end of file +Aeroméxico \ No newline at end of file diff --git a/icons/allocine.svg b/icons/allocine.svg index 4a53441c5..f55e43550 100644 --- a/icons/allocine.svg +++ b/icons/allocine.svg @@ -1 +1 @@ -AlloCiné \ No newline at end of file +AlloCiné \ No newline at end of file diff --git a/icons/citroen.svg b/icons/citroen.svg index 9794fcc56..128db1771 100644 --- a/icons/citroen.svg +++ b/icons/citroen.svg @@ -1 +1 @@ -Citroën \ No newline at end of file +Citroën \ No newline at end of file diff --git a/icons/dassaultsystemes.svg b/icons/dassaultsystemes.svg index 0a6e8cbba..14f4f654f 100644 --- a/icons/dassaultsystemes.svg +++ b/icons/dassaultsystemes.svg @@ -1 +1 @@ -Dassault Systèmes \ No newline at end of file +Dassault Systèmes \ No newline at end of file diff --git a/icons/letsencrypt.svg b/icons/letsencrypt.svg index c64bb5019..789898119 100644 --- a/icons/letsencrypt.svg +++ b/icons/letsencrypt.svg @@ -1 +1 @@ -Let's Encrypt \ No newline at end of file +Let’s Encrypt \ No newline at end of file diff --git a/icons/macys.svg b/icons/macys.svg index 6f1e32153..d336959bd 100644 --- a/icons/macys.svg +++ b/icons/macys.svg @@ -1 +1 @@ -Macy’s \ No newline at end of file +Macy’s \ No newline at end of file diff --git a/icons/metrodelaciudaddemexico.svg b/icons/metrodelaciudaddemexico.svg index 21b0fefda..f1af123fd 100644 --- a/icons/metrodelaciudaddemexico.svg +++ b/icons/metrodelaciudaddemexico.svg @@ -1 +1 @@ -Metro de la Ciudad de México \ No newline at end of file +Metro de la Ciudad de México \ No newline at end of file diff --git a/icons/metrodeparis.svg b/icons/metrodeparis.svg index 0b4e39cd9..09081bb47 100644 --- a/icons/metrodeparis.svg +++ b/icons/metrodeparis.svg @@ -1 +1 @@ -Métro de Paris \ No newline at end of file +Métro de Paris \ No newline at end of file diff --git a/icons/pokemon.svg b/icons/pokemon.svg index c2849175f..d19a290c7 100644 --- a/icons/pokemon.svg +++ b/icons/pokemon.svg @@ -1 +1 @@ -Pokémon \ No newline at end of file +Pokémon \ No newline at end of file diff --git a/icons/rte.svg b/icons/rte.svg index 1eb1922e5..9180bfbbc 100644 --- a/icons/rte.svg +++ b/icons/rte.svg @@ -1 +1 @@ -RTÉ \ No newline at end of file +RTÉ \ No newline at end of file diff --git a/icons/saopaulometro.svg b/icons/saopaulometro.svg index cea0c4659..12d154cdb 100644 --- a/icons/saopaulometro.svg +++ b/icons/saopaulometro.svg @@ -1 +1 @@ -São Paulo Metro \ No newline at end of file +São Paulo Metro \ No newline at end of file diff --git a/icons/skoda.svg b/icons/skoda.svg index ccbf36e60..6b36aa18c 100644 --- a/icons/skoda.svg +++ b/icons/skoda.svg @@ -1 +1 @@ -ŠKODA \ No newline at end of file +ŠKODA \ No newline at end of file diff --git a/icons/tado.svg b/icons/tado.svg index f71561cf7..90577e94e 100644 --- a/icons/tado.svg +++ b/icons/tado.svg @@ -1 +1 @@ -tado° \ No newline at end of file +tado° \ No newline at end of file diff --git a/package.json b/package.json index 2cb12a4ff..915a96f3d 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "jest": "27.2.2", "jest-diff": "27.2.2", "jsonschema": "1.4.0", + "named-html-entities-json": "0.1.0", "npm-run-all": "4.1.5", "rimraf": "3.0.2", "svg-path-bbox": "1.0.1", diff --git a/scripts/utils.js b/scripts/utils.js index b7eb0da32..6e918cefd 100644 --- a/scripts/utils.js +++ b/scripts/utils.js @@ -37,8 +37,12 @@ module.exports = { * @param {String} htmlFriendlyTitle The title to convert */ htmlFriendlyToTitle: htmlFriendlyTitle => ( - htmlFriendlyTitle - .replace(/'/g, "’") - .replace(/&/g, "&") + htmlFriendlyTitle.replace( + /&#([0-9]+);/g, + (_, num) => String.fromCharCode(parseInt(num)) + ).replace( + /&(quot|amp|lt|gt);/g, + (_, ref) => ({quot: '"', amp: '&', lt: '<', gt: '>'}[ref]) + ) ), }