You've already forked joplin
							
							
				mirror of
				https://github.com/laurent22/joplin.git
				synced 2025-10-31 00:07:48 +02:00 
			
		
		
		
	Desktop, Cli: Fixes #4965: Improved importing Evernote notes that contain codeblocks
This commit is contained in:
		| @@ -5,7 +5,7 @@ | ||||
|   "author": "Laurent Cozic", | ||||
|   "private": true, | ||||
|   "scripts": { | ||||
|     "test": "jest --config=jest.config.js --bail --forceExit", | ||||
|     "test": "jest --verbose=false --config=jest.config.js --bail --forceExit", | ||||
|     "test-one": "jest --verbose=false --config=jest.config.js --bail --forceExit", | ||||
|     "test-ci": "jest --config=jest.config.js --forceExit", | ||||
|     "build": "gulp build", | ||||
|   | ||||
| @@ -1,14 +1,16 @@ | ||||
| For example, consider a web page like this: | ||||
|  | ||||
| 	<!DOCTYPE html> | ||||
| 	<html> | ||||
| 	  <head> | ||||
| 	    <meta http-equiv="content-type" content="text/html; charset=utf-8" /> | ||||
| 	  </head> | ||||
| ``` | ||||
| <!DOCTYPE html> | ||||
| <html> | ||||
|   <head> | ||||
|     <meta http-equiv="content-type" content="text/html; charset=utf-8" /> | ||||
|   </head> | ||||
|  | ||||
| 	  <body> | ||||
| 	    <script src="page-scripts/page-script.js"></script> | ||||
| 	  </body> | ||||
| 	</html> | ||||
|   <body> | ||||
|     <script src="page-scripts/page-script.js"></script> | ||||
|   </body> | ||||
| </html> | ||||
| ``` | ||||
|  | ||||
| The script "page-script.js" does this: | ||||
| @@ -1,7 +1,9 @@ | ||||
| Subshell: | ||||
|  | ||||
| 	( | ||||
| 	    set -e | ||||
| 	    false | ||||
| 	    echo Unreachable | ||||
| 	) && echo Great success | ||||
| ``` | ||||
| ( | ||||
|     set -e | ||||
|     false | ||||
|     echo Unreachable | ||||
| ) && echo Great success | ||||
| ``` | ||||
| @@ -1 +1 @@ | ||||
| 	jq -r '.[]|[.index, .name, .section, .award, .industry]|join("\t")' raw.json |pbcopy | ||||
| `jq -r '.[]|[.index, .name, .section, .award, .industry]|join("\t")' raw.json |pbcopy` | ||||
							
								
								
									
										1
									
								
								packages/app-cli/tests/enex_to_md/code4.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								packages/app-cli/tests/enex_to_md/code4.html
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| <div><div>code block:</div><div><br/></div><div style="box-sizing: border-box; padding: 8px; font-family: Monaco, Menlo, Consolas, 'Courier New', monospace; font-size: 12px; color: rgb(51, 51, 51); border-radius: 4px; background-color: rgb(251, 250, 248); border: 1px solid rgba(0, 0, 0, 0.15);-en-codeblock:true;"><div>public static void main(String[] args) {</div><div><span>    System.out.println('Hello World');</span><br/></div><div>}</div></div><div><br/></div><div>end of code block</div></div> | ||||
							
								
								
									
										9
									
								
								packages/app-cli/tests/enex_to_md/code4.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								packages/app-cli/tests/enex_to_md/code4.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| code block: | ||||
|  | ||||
| ``` | ||||
| public static void main(String[] args) { | ||||
|     System.out.println('Hello World'); | ||||
| } | ||||
| ``` | ||||
|  | ||||
| end of code block | ||||
| @@ -31,6 +31,7 @@ interface Section { | ||||
| interface ParserStateTag { | ||||
| 	name: string; | ||||
| 	visible: boolean; | ||||
| 	isCodeBlock: boolean; | ||||
| } | ||||
|  | ||||
| interface ParserStateList { | ||||
| @@ -443,6 +444,20 @@ function isInvisibleBlock(context: any, attributes: any) { | ||||
| 	return display && display.indexOf('none') === 0; | ||||
| } | ||||
|  | ||||
| function trimBlockOpenAndClose(lines: string[]): string[] { | ||||
| 	const output = lines.slice(); | ||||
|  | ||||
| 	while (output.length && [BLOCK_OPEN, BLOCK_CLOSE, ''].includes(output[0])) { | ||||
| 		output.splice(0, 1); | ||||
| 	} | ||||
|  | ||||
| 	while (output.length && [BLOCK_OPEN, BLOCK_CLOSE, ''].includes(output[output.length - 1])) { | ||||
| 		output.pop(); | ||||
| 	} | ||||
|  | ||||
| 	return output; | ||||
| } | ||||
|  | ||||
| function isSpanWithStyle(attributes: any) { | ||||
| 	if (attributes != undefined) { | ||||
| 		if ('style' in attributes) { | ||||
| @@ -484,6 +499,16 @@ function displaySaxWarning(context: any, message: string) { | ||||
| 	console.warn(line.join(': ')); | ||||
| } | ||||
|  | ||||
| function isCodeBlock(context: any, nodeName: string, attributes: any) { | ||||
| 	if (nodeName === 'code') return true; | ||||
|  | ||||
| 	if (attributes && attributes.style) { | ||||
| 		const enCodeBlock = cssValue(context, attributes.style, '-en-codeblock'); | ||||
| 		if (enCodeBlock && enCodeBlock.toLowerCase() === 'true') return true; | ||||
| 	} | ||||
| 	return false; | ||||
| } | ||||
|  | ||||
| // function removeSectionParent(section:Section | string) { | ||||
| // 	if (typeof section === 'string') return section; | ||||
|  | ||||
| @@ -575,11 +600,13 @@ function enexXmlToMdArray(stream: any, resources: ResourceEntity[]): Promise<Ene | ||||
| 			const nodeAttributes = attributeToLowerCase(node); | ||||
| 			const n = node.name.toLowerCase(); | ||||
| 			const isVisible = !isInvisibleBlock(this, nodeAttributes); | ||||
|  | ||||
| 			state.tags.push({ | ||||
| 			const tagInfo: ParserStateTag = { | ||||
| 				name: n, | ||||
| 				visible: isVisible, | ||||
| 			}); | ||||
| 				isCodeBlock: isCodeBlock(this, n, nodeAttributes), | ||||
| 			}; | ||||
|  | ||||
| 			state.tags.push(tagInfo); | ||||
|  | ||||
| 			const currentList = state.lists && state.lists.length ? state.lists[state.lists.length - 1] : null; | ||||
|  | ||||
| @@ -673,6 +700,25 @@ function enexXmlToMdArray(stream: any, resources: ResourceEntity[]): Promise<Ene | ||||
| 					lines: [], | ||||
| 					parent: section, | ||||
| 				}; | ||||
| 				section.lines.push(newSection); | ||||
| 				section = newSection; | ||||
| 			} else if (tagInfo.isCodeBlock) { | ||||
| 				// state.inPre = false; | ||||
|  | ||||
| 				// const previousIsPre = state.tags.length ? state.tags[state.tags.length - 1].name === 'pre' : false; | ||||
| 				// if (previousIsPre) { | ||||
| 				// 	section.lines.pop(); | ||||
| 				// } | ||||
|  | ||||
| 				state.inCode.push(true); | ||||
| 				state.currentCode = ''; | ||||
|  | ||||
| 				const newSection: Section = { | ||||
| 					type: SectionType.Code, | ||||
| 					lines: [], | ||||
| 					parent: section, | ||||
| 				}; | ||||
|  | ||||
| 				section.lines.push(newSection); | ||||
| 				section = newSection; | ||||
| 			} else if (isBlockTag(n)) { | ||||
| @@ -750,18 +796,6 @@ function enexXmlToMdArray(stream: any, resources: ResourceEntity[]): Promise<Ene | ||||
| 			} else if (n == 'blockquote') { | ||||
| 				section.lines.push(BLOCK_OPEN); | ||||
| 				state.inQuote = true; | ||||
| 			} else if (n === 'code') { | ||||
| 				state.inCode.push(true); | ||||
| 				state.currentCode = ''; | ||||
|  | ||||
| 				const newSection: Section = { | ||||
| 					type: SectionType.Code, | ||||
| 					lines: [], | ||||
| 					parent: section, | ||||
| 				}; | ||||
|  | ||||
| 				section.lines.push(newSection); | ||||
| 				section = newSection; | ||||
| 			} else if (n === 'pre') { | ||||
| 				section.lines.push(BLOCK_OPEN); | ||||
| 				state.inPre = true; | ||||
| @@ -871,6 +905,28 @@ function enexXmlToMdArray(stream: any, resources: ResourceEntity[]): Promise<Ene | ||||
| 				// End of note | ||||
| 			} else if (!poppedTag.visible) { | ||||
| 				if (section && section.parent) section = section.parent; | ||||
| 			} else if (poppedTag.isCodeBlock) { | ||||
| 				state.inCode.pop(); | ||||
|  | ||||
| 				if (!state.inCode.length) { | ||||
| 					// When a codeblock is wrapped in <pre><code>, it will have | ||||
| 					// extra empty lines added by the "pre" logic, but since we | ||||
| 					// are in a codeblock we should actually trim those. | ||||
| 					const codeLines = trimBlockOpenAndClose(processMdArrayNewLines(section.lines).split('\n')); | ||||
| 					section.lines = []; | ||||
| 					if (codeLines.length > 1) { | ||||
| 						section.lines.push('\n\n```\n'); | ||||
| 						for (let i = 0; i < codeLines.length; i++) { | ||||
| 							if (i > 0) section.lines.push('\n'); | ||||
| 							section.lines.push(codeLines[i]); | ||||
| 						} | ||||
| 						section.lines.push('\n```\n\n'); | ||||
| 					} else { | ||||
| 						section.lines.push(`\`${markdownUtils.escapeInlineCode(codeLines.join(''))}\``); | ||||
| 					} | ||||
|  | ||||
| 					if (section && section.parent) section = section.parent; | ||||
| 				} | ||||
| 			} else if (isNewLineOnlyEndTag(n)) { | ||||
| 				section.lines.push(BLOCK_CLOSE); | ||||
| 			} else if (n == 'td' || n == 'th') { | ||||
| @@ -897,23 +953,6 @@ function enexXmlToMdArray(stream: any, resources: ResourceEntity[]): Promise<Ene | ||||
| 			} else if (n == 'blockquote') { | ||||
| 				section.lines.push(BLOCK_OPEN); | ||||
| 				state.inQuote = false; | ||||
| 			} else if (n === 'code') { | ||||
| 				state.inCode.pop(); | ||||
|  | ||||
| 				if (!state.inCode.length) { | ||||
| 					const codeLines = processMdArrayNewLines(section.lines).split('\n'); | ||||
| 					section.lines = []; | ||||
| 					if (codeLines.length > 1) { | ||||
| 						for (let i = 0; i < codeLines.length; i++) { | ||||
| 							if (i > 0) section.lines.push('\n'); | ||||
| 							section.lines.push(`\t${codeLines[i]}`); | ||||
| 						} | ||||
| 					} else { | ||||
| 						section.lines.push(`\`${codeLines.join('')}\``); | ||||
| 					} | ||||
|  | ||||
| 					if (section && section.parent) section = section.parent; | ||||
| 				} | ||||
| 			} else if (n === 'pre') { | ||||
| 				state.inPre = false; | ||||
| 				section.lines.push(BLOCK_CLOSE); | ||||
|   | ||||
| @@ -41,6 +41,11 @@ const markdownUtils = { | ||||
| 		return text; | ||||
| 	}, | ||||
|  | ||||
| 	escapeInlineCode(text: string): string { | ||||
| 		// https://github.com/github/markup/issues/363#issuecomment-55499909 | ||||
| 		return text.replace(/`/g, '``'); | ||||
| 	}, | ||||
|  | ||||
| 	unescapeLinkUrl(url: string) { | ||||
| 		url = url.replace(/%28/g, '('); | ||||
| 		url = url.replace(/%29/g, ')'); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user