2022-11-22 20:16:57 +02:00
import { unique } from '@joplin/lib/ArrayUtils' ;
2023-08-21 19:37:33 +02:00
import { attributesHtml , isSelfClosingTag } from '@joplin/renderer/htmlUtils' ;
2023-10-11 11:17:46 +02:00
import { Translations } from '../../utils/translation' ;
2022-11-22 20:16:57 +02:00
const Entities = require ( 'html-entities' ) . AllHtmlEntities ;
const htmlentities = new Entities ( ) . encode ;
const htmlparser2 = require ( '@joplin/fork-htmlparser2' ) ;
const trimHtml = ( content : string ) = > {
return content
. replace ( /\n/g , '' )
. replace ( /^(&tab;)+/i , '' )
. replace ( /^( )+/i , '' )
. replace ( /(&tab;)+$/i , '' )
2022-11-28 18:16:32 +02:00
. replace ( /( )+$/i , '' )
. replace ( /^\t+/ , '' )
. replace ( /\t+$/ , '' ) ;
2022-11-22 20:16:57 +02:00
} ;
2023-10-11 11:17:46 +02:00
const findTranslation = ( englishString : string , translations : Translations ) : string = > {
2022-11-22 20:16:57 +02:00
const stringsToTry = unique ( [
englishString ,
englishString . replace ( /<br\/>/gi , '<br>' ) ,
englishString . replace ( /<br \/>/gi , '<br>' ) ,
englishString
. replace ( /'/gi , '\'' )
. replace ( /"/gi , '"' ) ,
] ) as string [ ] ;
for ( const stringToTry of stringsToTry ) {
2023-10-11 11:17:46 +02:00
// Note that we don't currently support plural forms for the website
if ( translations [ stringToTry ] && translations [ stringToTry ] . length ) return translations [ stringToTry ] [ 0 ] ;
2022-11-22 20:16:57 +02:00
}
return englishString ;
} ;
2022-11-28 18:16:32 +02:00
const encodeHtml = ( decodedText : string ) : string = > {
return htmlentities ( decodedText )
. replace ( /	/gi , '\t' )
. replace ( /{{> /gi , '{{> ' ) ; // Don't break Mustache partials
} ;
2022-11-22 20:16:57 +02:00
2023-10-11 11:17:46 +02:00
export default ( html : string , _languageCode : string , translations : Translations ) = > {
2022-11-22 20:16:57 +02:00
const output : string [ ] = [ ] ;
interface State {
// When inside a block that needs to be translated, this array
// accumulates the opening tags. For example, this text:
//
// <div translate>Hello <b>world</b></div>
//
// will have the tags ['div', 'b']
//
// This is used to track when we've processed all the content, including
// HTML content, within a translatable block. Once that stack is empty,
// we reached the end, and can translate the string that we got.
translateStack : string [ ] ;
// Keep a reference to the opening tag. For example in:
//
// <div translate>Hello <b>world</b></div>
//
// The opening tag is "div".
currentTranslationTag : string [ ] ;
2024-02-26 12:16:23 +02:00
// Once we finished processing the translatable block, this will contain
2022-11-22 20:16:57 +02:00
// the string to be translated. It may contain HTML.
currentTranslationContent : string [ ] ;
2024-02-26 12:16:23 +02:00
// Tells if we're at the beginning of a translatable block.
2022-11-22 20:16:57 +02:00
translateIsOpening : boolean ;
2022-11-28 18:16:32 +02:00
inScript : boolean ;
2022-11-22 20:16:57 +02:00
}
const state : State = {
translateStack : [ ] ,
currentTranslationTag : [ ] ,
currentTranslationContent : [ ] ,
translateIsOpening : false ,
2022-11-28 18:16:32 +02:00
inScript : false ,
2022-11-22 20:16:57 +02:00
} ;
const pushContent = ( state : State , content : string ) = > {
if ( state . translateStack . length ) {
if ( state . translateIsOpening ) {
state . currentTranslationTag . push ( content ) ;
} else {
state . currentTranslationContent . push ( content ) ;
}
} else {
output . push ( content ) ;
}
} ;
const parser = new htmlparser2 . Parser ( {
2024-04-05 13:16:49 +02:00
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
2022-11-22 20:16:57 +02:00
onopentag : ( name : string , attrs : any ) = > {
2022-11-28 18:16:32 +02:00
if ( name === 'script' ) state . inScript = true ;
2022-11-22 20:16:57 +02:00
if ( 'translate' in attrs ) {
if ( state . translateStack . length ) throw new Error ( ` Cannot have a translate block within another translate block. At tag " ${ name } " attrs: ${ JSON . stringify ( attrs ) } ` ) ;
state . translateStack . push ( name ) ;
state . currentTranslationContent = [ ] ;
state . currentTranslationTag = [ ] ;
state . translateIsOpening = true ;
} else if ( state . translateStack . length ) {
state . translateStack . push ( name ) ;
}
2023-08-21 19:37:33 +02:00
let attrHtml = attributesHtml ( attrs ) ;
2022-11-22 20:16:57 +02:00
if ( attrHtml ) attrHtml = ` ${ attrHtml } ` ;
2023-08-21 19:37:33 +02:00
const closingSign = isSelfClosingTag ( name ) ? '/>' : '>' ;
2022-11-22 20:16:57 +02:00
pushContent ( state , ` < ${ name } ${ attrHtml } ${ closingSign } ` ) ;
state . translateIsOpening = false ;
} ,
ontext : ( decodedText : string ) = > {
2022-11-28 18:16:32 +02:00
const encodedText = state . inScript ? decodedText : encodeHtml ( decodedText ) ;
pushContent ( state , encodedText ) ;
2022-11-22 20:16:57 +02:00
} ,
onclosetag : ( name : string ) = > {
if ( state . translateStack . length ) {
state . translateStack . pop ( ) ;
if ( ! state . translateStack . length ) {
const stringToTranslate = trimHtml ( state . currentTranslationContent . join ( '' ) ) ;
const translation = findTranslation ( stringToTranslate , translations ) ;
output . push ( state . currentTranslationTag [ 0 ] ) ;
output . push ( translation ) ;
}
}
2022-11-28 18:16:32 +02:00
if ( name === 'script' ) state . inScript = false ;
2023-08-21 19:37:33 +02:00
if ( isSelfClosingTag ( name ) ) return ;
2022-11-22 20:16:57 +02:00
pushContent ( state , ` </ ${ name } > ` ) ;
} ,
} , { decodeEntities : true } ) ;
parser . write ( html ) ;
parser . end ( ) ;
return output . join ( '\n' ) ;
} ;