2019-09-23 14:18:30 -07:00
const stringToStream = require ( 'string-to-stream' ) ;
2020-10-21 12:02:06 +01:00
// const cleanHtml = require('clean-html');
2020-11-05 16:58:23 +00:00
const resourceUtils = require ( './resourceUtils.js' ) ;
2021-01-30 11:08:11 +00:00
const htmlUtils = require ( './htmlUtils' ) . default ;
2020-06-15 17:10:51 +01:00
const Entities = require ( 'html-entities' ) . AllHtmlEntities ;
const htmlentities = new Entities ( ) . encode ;
2019-09-23 14:18:30 -07:00
function addResourceTag ( lines , resource , attributes ) {
// Note: refactor to use Resource.markdownTag
if ( ! attributes . alt ) attributes . alt = resource . title ;
if ( ! attributes . alt ) attributes . alt = resource . filename ;
if ( ! attributes . alt ) attributes . alt = '' ;
const src = ` :/ ${ resource . id } ` ;
if ( resourceUtils . isImageMimeType ( resource . mime ) ) {
2020-02-04 22:09:34 +00:00
lines . push ( resourceUtils . imgElement ( { src , attributes } ) ) ;
2019-09-23 14:18:30 -07:00
} else if ( resource . mime === 'audio/x-m4a' ) {
2022-07-23 09:31:32 +02:00
// TODO: once https://github.com/laurent22/joplin/issues/1794 is resolved,
// come back to this and make sure it works.
2019-09-23 14:18:30 -07:00
lines . push ( resourceUtils . audioElement ( {
src ,
alt : attributes . alt ,
id : resource . id ,
} ) ) ;
} else {
// TODO: figure out what other mime types can be handled more gracefully
lines . push ( resourceUtils . attachmentElement ( {
src ,
attributes ,
id : resource . id ,
} ) ) ;
}
return lines ;
}
function attributeToLowerCase ( node ) {
if ( ! node . attributes ) return { } ;
2020-03-13 23:46:14 +00:00
const output = { } ;
for ( const n in node . attributes ) {
2019-09-23 14:18:30 -07:00
if ( ! node . attributes . hasOwnProperty ( n ) ) continue ;
output [ n . toLowerCase ( ) ] = node . attributes [ n ] ;
}
return output ;
}
function enexXmlToHtml _ ( stream , resources ) {
2020-03-13 23:46:14 +00:00
const remainingResources = resources . slice ( ) ;
2019-09-23 14:18:30 -07:00
2020-05-21 09:14:33 +01:00
const removeRemainingResource = id => {
2019-09-23 14:18:30 -07:00
for ( let i = 0 ; i < remainingResources . length ; i ++ ) {
const r = remainingResources [ i ] ;
if ( r . id === id ) {
remainingResources . splice ( i , 1 ) ;
}
}
} ;
2020-06-15 17:10:51 +01:00
return new Promise ( ( resolve ) => {
2019-09-23 14:18:30 -07:00
const options = { } ;
const strict = false ;
2020-11-07 15:59:37 +00:00
const saxStream = require ( '@joplin/fork-sax' ) . createStream ( strict , options ) ;
2019-09-23 14:18:30 -07:00
2020-03-13 23:46:14 +00:00
const section = {
2019-09-23 14:18:30 -07:00
type : 'text' ,
lines : [ ] ,
parent : null ,
} ;
saxStream . on ( 'error' , function ( e ) {
console . warn ( e ) ;
} ) ;
saxStream . on ( 'text' , function ( text ) {
2020-06-15 17:10:51 +01:00
section . lines . push ( htmlentities ( text ) ) ;
2019-09-23 14:18:30 -07:00
} ) ;
saxStream . on ( 'opentag' , function ( node ) {
const tagName = node . name . toLowerCase ( ) ;
const attributesStr = resourceUtils . attributesToStr ( node . attributes ) ;
if ( tagName === 'en-media' ) {
const nodeAttributes = attributeToLowerCase ( node ) ;
const hash = nodeAttributes . hash ;
let resource = null ;
for ( let i = 0 ; i < resources . length ; i ++ ) {
2020-03-13 23:46:14 +00:00
const r = resources [ i ] ;
2022-07-23 09:31:32 +02:00
if ( r . id === hash ) {
2019-09-23 14:18:30 -07:00
resource = r ;
removeRemainingResource ( r . id ) ;
break ;
}
}
if ( ! resource ) {
// TODO: Extract this duplicate of code in ./import-enex-md-gen.js
let found = false ;
for ( let i = 0 ; i < remainingResources . length ; i ++ ) {
2020-03-13 23:46:14 +00:00
const r = remainingResources [ i ] ;
2019-09-23 14:18:30 -07:00
if ( ! r . id ) {
resource = Object . assign ( { } , r ) ;
resource . id = hash ;
remainingResources . splice ( i , 1 ) ;
found = true ;
break ;
}
}
if ( ! found ) {
2020-06-15 17:10:51 +01:00
// console.warn(`Hash with no associated resource: ${hash}`);
2019-09-23 14:18:30 -07:00
}
}
// If the resource does not appear among the note's resources, it
// means it's an attachement. It will be appended along with the
// other remaining resources at the bottom of the markdown text.
if ( resource && ! ! resource . id ) {
section . lines = addResourceTag ( section . lines , resource , nodeAttributes ) ;
}
2022-07-23 09:31:32 +02:00
} else if ( tagName === 'en-todo' ) {
2020-06-28 18:55:47 +01:00
const nodeAttributes = attributeToLowerCase ( node ) ;
2022-07-23 09:31:32 +02:00
const checkedHtml = nodeAttributes . checked && nodeAttributes . checked . toLowerCase ( ) === 'true' ? ' checked="checked" ' : ' ' ;
2020-06-28 18:55:47 +01:00
section . lines . push ( ` <input ${ checkedHtml } type="checkbox" onclick="return false;" /> ` ) ;
2021-01-30 11:08:11 +00:00
} else if ( htmlUtils . isSelfClosingTag ( tagName ) ) {
2020-06-15 17:10:51 +01:00
section . lines . push ( ` < ${ tagName } ${ attributesStr } /> ` ) ;
2019-09-23 14:18:30 -07:00
} else {
2020-06-15 17:10:51 +01:00
section . lines . push ( ` < ${ tagName } ${ attributesStr } > ` ) ;
2019-09-23 14:18:30 -07:00
}
} ) ;
2020-06-15 17:10:51 +01:00
saxStream . on ( 'closetag' , function ( node ) {
const tagName = node ? node . toLowerCase ( ) : node ;
2021-01-30 11:08:11 +00:00
if ( ! htmlUtils . isSelfClosingTag ( tagName ) ) section . lines . push ( ` </ ${ tagName } > ` ) ;
2019-09-23 14:18:30 -07:00
} ) ;
saxStream . on ( 'attribute' , function ( ) { } ) ;
saxStream . on ( 'end' , function ( ) {
resolve ( {
content : section ,
resources : remainingResources ,
} ) ;
} ) ;
stream . pipe ( saxStream ) ;
} ) ;
}
async function enexXmlToHtml ( xmlString , resources , options = { } ) {
const stream = stringToStream ( xmlString ) ;
2020-03-13 23:46:14 +00:00
const result = await enexXmlToHtml _ ( stream , resources , options ) ;
2019-09-23 14:18:30 -07:00
2020-06-15 17:10:51 +01:00
const preCleaning = result . content . lines . join ( '' ) ;
const final = await beautifyHtml ( preCleaning ) ;
return final . join ( '' ) ;
2019-09-23 14:18:30 -07:00
}
const beautifyHtml = ( html ) => {
2020-10-21 12:02:06 +01:00
// The clean-html package doesn't appear to be robust enough to deal with the crazy HTML that Evernote can generate.
// In the best case scenario it will throw an error but in some cases it will go into an infinite loop, so
// for that reason we need to disable it.
//
// Fixed https://github.com/laurent22/joplin/issues/3958
return [ html ] ;
// return new Promise((resolve) => {
// try {
// cleanHtml.clean(html, { wrap: 0 }, (...cleanedHtml) => resolve(cleanedHtml));
// } catch (error) {
// console.warn(`Could not clean HTML - the "unclean" version will be used: ${error.message}: ${html.trim().substr(0, 512).replace(/[\n\r]/g, ' ')}...`);
// resolve([html]);
// }
// });
2019-09-23 14:18:30 -07:00
} ;
2020-02-04 22:09:34 +00:00
module . exports = { enexXmlToHtml } ;