1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-24 10:27:10 +02:00

Importing Evernote notes

This commit is contained in:
Laurent Cozic 2016-12-22 22:35:35 +01:00
parent 9d1e62a872
commit 6c3c9d8a83
10 changed files with 398 additions and 71 deletions

5
.gitignore vendored
View File

@ -20,4 +20,7 @@ QtClient/build-*-Debug/
*.pro.user
notes*.sqlite
Makefile
Makefile.*
Makefile.*
TODO.md
tests/generated
QtClient/JoplinQtClient/make.bat

View File

@ -8,7 +8,7 @@
using namespace jop;
Application::Application(int &argc, char **argv) : QGuiApplication(argc, argv) {
db_ = Database("D:/Web/www/joplin/notes.sqlite");
db_ = Database("D:/Web/www/joplin/QtClient/data/notes.sqlite");
folderService_ = FolderService(db_);
folderModel_.setService(folderService_);

View File

@ -1,13 +1,12 @@
CREATE TABLE folders (
id INTEGER PRIMARY KEY,
id TEXT PRIMARY KEY,
title TEXT,
created_time INT,
updated_time INT,
remote_id TEXT
updated_time INT
);
CREATE TABLE notes (
id INTEGER PRIMARY KEY,
id TEXT PRIMARY KEY,
title TEXT,
body TEXT,
parent_id INT,
@ -16,7 +15,6 @@ CREATE TABLE notes (
latitude NUMERIC,
longitude NUMERIC,
altitude NUMERIC,
remote_id TEXT,
source TEXT,
author TEXT,
source_url TEXT,
@ -29,12 +27,20 @@ CREATE TABLE notes (
);
CREATE TABLE tags (
id INTEGER PRIMARY KEY,
title TEXT
id TEXT PRIMARY KEY,
title TEXT,
created_time INT,
updated_time INT
);
CREATE TABLE note_tags (
id INTEGER PRIMARY KEY,
note_id TEXT,
tag_id TEXT
);
CREATE TABLE resources (
id INTEGER PRIMARY KEY,
id TEXT PRIMARY KEY,
title TEXT,
mime TEXT,
filename TEXT,
@ -43,9 +49,9 @@ CREATE TABLE resources (
);
CREATE TABLE note_resources (
id INTEGER PRIMARY KEY,
note_id INT,
resource_id INT
id INTEGER PRIMARY KEY,
note_id TEXT,
resource_id TEXT
);
CREATE TABLE version (

View File

@ -1,13 +1,14 @@
#!/bin/bash
set -e
"/cygdrive/c/Qt/5.7/msvc2015/bin/qmake.exe" "D:\\Web\\www\\joplin\\QtClient\\evernote-import\\evernote-import-qt.pro" -spec win32-msvc2015 "CONFIG+=debug" "CONFIG+=qml_debug"
cd /cygdrive/d/Web/www/joplin/QtClient/build-evernote-import-qt-Visual_C_32_bites-Debug
rm -rf debug/ release/ Makefile*
export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin":$PATH
export PATH=$PATH:"/cygdrive/c/Program Files (x86)/Windows Kits/8.1/bin/x86"
export PATH=$PATH:"/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include"
"/cygdrive/c/Qt/5.7/msvc2015/bin/qmake.exe" D:\\Web\\www\\joplin\\QtClient\\evernote-import\\evernote-import-qt.pro -spec win32-msvc2015 "CONFIG+=debug" "CONFIG+=qml_debug"
"/cygdrive/c/Qt/Tools/QtCreator/bin/jom.exe" qmake_all
"/cygdrive/c/Qt/Tools/QtCreator/bin/jom.exe"
# "/opt/Qt/5.7/gcc_64/bin/qmake" /home/laurent/src/notes/evernote-import-qt/evernote-import-qt.pro -spec linux-g++ CONFIG+=debug CONFIG+=qml_debug
# "/usr/bin/make" qmake_all
# make
# echo "============================================="
# ./evernote-import-qt
rsync -a /cygdrive/d/Web/www/joplin/QtClient/dependencies/dll-debug/ /cygdrive/d/Web/www/joplin/QtClient/build-evernote-import-qt-Visual_C_32_bites-Debug/debug
cd -

View File

@ -25,3 +25,10 @@ DEFINES += QT_DEPRECATED_WARNINGS
HEADERS += \
xmltomd.h
INCLUDEPATH += "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include"
INCLUDEPATH += "C:/Program Files (x86)/Windows Kits/10/Include/10.0.10240.0/ucrt"
LIBS += -L"C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/lib"
LIBS += -L"C:/Program Files (x86)/Windows Kits/8.1/Lib/winv6.3/um/x86"
LIBS += -L"C:/Program Files (x86)/Windows Kits/10/Lib/10.0.10240.0/ucrt/x86"

View File

@ -8,21 +8,22 @@
#include <QDir>
#include <QSqlError>
#include <QSqlRecord>
#include <QCryptographicHash>
#include <QTextCodec>
#include "xmltomd.h"
struct Resource {
QString id;
QString mime;
QString filename;
QByteArray data;
time_t timestamp;
struct EnMediaElement {
QString hash;
QString alt;
};
Resource() : timestamp(0) {}
struct ContentElements {
QList<EnMediaElement> enMediaElements;
};
struct Note {
int id;
QString id;
QString title;
QString content;
time_t created;
@ -39,11 +40,17 @@ struct Note {
QString reminderTime;
QString sourceApplication;
QString applicationData;
std::vector<Resource> resources;
QList<EnMediaElement> enMediaElements;
std::vector<xmltomd::Resource> resources;
Note() : created(0), updated(0) {}
};
QString createUuid(const QString& s) {
QString hash = QString(QCryptographicHash::hash(s.toUtf8(), QCryptographicHash::Sha256).toHex());
return hash.left(32);
}
time_t dateStringToTimestamp(const QString& s) {
QDateTime d = QDateTime::fromString(s, "yyyyMMddThhmmssZ");
d.setTimeSpec(Qt::UTC);
@ -100,7 +107,7 @@ void parseAttributes(QXmlStreamReader& reader, Note& note) {
// </resource-attributes>
// </resource>
void parseResourceAttributes(QXmlStreamReader& reader, Resource& resource) {
void parseResourceAttributes(QXmlStreamReader& reader, xmltomd::Resource& resource) {
while (reader.readNextStartElement()) {
if (reader.name() == "file-name") {
resource.filename = reader.readElementText();
@ -116,7 +123,7 @@ void parseResourceAttributes(QXmlStreamReader& reader, Resource& resource) {
}
}
void parseResourceRecognition(QXmlStreamReader& reader, Resource& resource) {
void parseResourceRecognition(QXmlStreamReader& reader, xmltomd::Resource& resource) {
QString recognitionXml = reader.readElementText();
QXmlStreamReader r(recognitionXml.toUtf8());
@ -141,8 +148,8 @@ void parseResourceRecognition(QXmlStreamReader& reader, Resource& resource) {
}
}
Resource parseResource(QXmlStreamReader& reader) {
Resource output;
xmltomd::Resource parseResource(QXmlStreamReader& reader) {
xmltomd::Resource output;
while (reader.readNextStartElement()) {
if (reader.name() == "data") {
QString encoding = "";
@ -154,7 +161,7 @@ Resource parseResource(QXmlStreamReader& reader) {
}
if (encoding != "base64") {
qWarning() << "Unsupported <resource><data> encoding:" << encoding;
return Resource();
return xmltomd::Resource();
}
output.data = QByteArray::fromBase64(reader.readElementText().toUtf8());
@ -173,7 +180,37 @@ Resource parseResource(QXmlStreamReader& reader) {
}
}
//qDebug() << output.id << output.mime << output.filename << output.timestamp;
if (!output.id.length()) {
//output.id = createUuid(QString("%1%2%3%4").arg(output.filename).arg(output.timestamp).arg(QDateTime::currentMSecsSinceEpoch()).arg((int)qrand()));
}
return output;
}
ContentElements parseContentElements(const QString& content) {
ContentElements output;
QXmlStreamReader reader(content.toUtf8());
if (reader.readNextStartElement()) {
while (!reader.atEnd()) {
reader.readNext();
QStringRef n = reader.name();
if (reader.isStartElement()) {
if (n == "en-media") {
EnMediaElement e;
foreach (const QXmlStreamAttribute &attr, reader.attributes()) {
if (attr.name().toString() == "hash") e.hash = attr.value().toString();
if (attr.name().toString() == "alt") e.alt = attr.value().toString();
}
output.enMediaElements << e;
}
}
}
} else {
qWarning() << "Cannot parse XML:" << content;
}
return output;
}
@ -186,6 +223,8 @@ Note parseNote(QXmlStreamReader& reader) {
note.title = reader.readElementText();
} else if (reader.name() == "content") {
note.content = reader.readElementText();
ContentElements contentElements = parseContentElements(note.content);
note.enMediaElements = contentElements.enMediaElements;
} else if (reader.name() == "created") {
note.created = dateStringToTimestamp(reader.readElementText());
} else if (reader.name() == "updated") {
@ -202,11 +241,59 @@ Note parseNote(QXmlStreamReader& reader) {
}
}
//qDebug() << title << created << updated;
note.id = createUuid(QString("%1%2%3%4%5")
.arg(note.title)
.arg(note.content)
.arg(note.created)
.arg(QDateTime::currentMSecsSinceEpoch())
.arg((qint64)qrand()));
//qDebug() << note.longitude << note.latitude << note.source << note.author << note.sourceUrl;
// This is a bit of a hack. Notes sometime have resources attached to it, but those <resource> tags don't contain
// an "objID" tag, making it impossible to reference the resource. However, in this case the content of the note
// will contain a corresponding <en-media/> tag, which has the ID in the "hash" attribute. All this information
// has been collected above so we now set the resource ID to the hash attribute of the en-media tags. Here's an
// example of note that shows this problem:
//qDebug() << note.sourceApplication << note.reminderOrder << note.reminderDoneTime;
// <?xml version="1.0" encoding="UTF-8"?>
// <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
// <en-export export-date="20161221T203133Z" application="Evernote/Windows" version="6.x">
// <note>
// <title>Commande Asda</title>
// <content>
// <![CDATA[
// <?xml version="1.0" encoding="UTF-8"?>
// <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
// <en-note>
// <en-media alt="your QR code" hash="216a16a1bbe007fba4ccf60b118b4ccc" type="image/png"></en-media></en-note>]]>
// </content>
// <created>20160921T203424Z</created>
// <updated>20160921T203438Z</updated>
// <note-attributes>
// <reminder-order>20160902T140445Z</reminder-order>
// <reminder-done-time>20160924T101120Z</reminder-done-time>
// </note-attributes>
// <resource>
// <data encoding="base64">........</data>
// <mime>image/png</mime>
// <width>150</width>
// <height>150</height>
// </resource>
// </note>
// </en-export>
int mediaHashIndex = 0;
for (int i = 0; i < note.resources.size(); i++) {
xmltomd::Resource& r = note.resources[i];
if (r.id == "") {
if (note.enMediaElements.size() <= mediaHashIndex) {
qWarning() << "Resource without an ID and hash did not appear in note content:" << note.id;
} else {
r.id = note.enMediaElements[mediaHashIndex].hash;
r.alt = note.enMediaElements[mediaHashIndex].alt;
mediaHashIndex++;
}
}
}
return note;
}
@ -220,6 +307,9 @@ std::vector<Note> parseXmlFile(const QString& filePath) {
return output;
}
QTextStream in(&file);
in.setCodec("UTF-8");
QByteArray fileData = file.readAll();
QXmlStreamReader reader(fileData);
@ -239,11 +329,25 @@ std::vector<Note> parseXmlFile(const QString& filePath) {
return output;
}
void filePutContents(const QString& filePath, const QString& content) {
QFile file(filePath);
if (file.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
QTextStream stream(&file);
stream << content;
} else {
qCritical() << "Cannot write to" << filePath;
}
}
int main(int argc, char *argv[]) {
QCoreApplication a(argc, argv);
QString dbPath = "D:/Web/www/joplin/notes.sqlite";
QTextCodec::setCodecForLocale(QTextCodec::codecForName("UTF-8"));
qsrand(QTime::currentTime().msec());
QString dbPath = "D:/Web/www/joplin/QtClient/data/notes.sqlite";
QString resourceDir = "D:/Web/www/joplin/QtClient/data/resources";
QSqlDatabase db = QSqlDatabase::addDatabase("QSQLITE");
db.setDatabaseName(dbPath);
@ -258,19 +362,24 @@ int main(int argc, char *argv[]) {
// TODO: REMOVE REMOVE REMOVE
db.exec("DELETE FROM folders");
db.exec("DELETE FROM notes");
db.exec("DELETE FROM resources");
db.exec("DELETE FROM note_resources");
db.exec("DELETE FROM tags");
// TODO: REMOVE REMOVE REMOVE
QDir dir("S:/Docs/Textes/Calendrier/EvernoteBackup/Enex20161219");
dir.setFilter(QDir::Files | QDir::Hidden | QDir::NoSymLinks);
QFileInfoList fileList = dir.entryInfoList();
qDebug() << fileList.size();
QMap<QString, QList<Note>> tagNotes;
for (int i = 0; i < fileList.size(); ++i) {
QFileInfo fileInfo = fileList.at(i);
db.exec("BEGIN TRANSACTION");
QSqlQuery query(db);
query.prepare("INSERT INTO folders (title, created_time, updated_time) VALUES (?, ?, ?)");
query.prepare("INSERT INTO folders (id, title, created_time, updated_time) VALUES (?, ?, ?, ?)");
query.addBindValue(createUuid(QString("%1%2%3%4").arg(fileInfo.baseName()).arg(fileInfo.created().toTime_t()).arg((int)qrand()).arg(QDateTime::currentMSecsSinceEpoch())));
query.addBindValue(fileInfo.baseName());
query.addBindValue(fileInfo.created().toTime_t());
query.addBindValue(fileInfo.created().toTime_t());
@ -280,13 +389,48 @@ int main(int argc, char *argv[]) {
for (int noteIndex = 0; noteIndex < notes.size(); noteIndex++) {
Note n = notes[noteIndex];
for (int resourceIndex = 0; resourceIndex < n.resources.size(); resourceIndex++) {
xmltomd::Resource resource = n.resources[resourceIndex];
QSqlQuery query(db);
query.prepare("INSERT INTO resources (id, title, mime, filename, created_time, updated_time) VALUES (?,?,?,?,?,?)");
query.addBindValue(resource.id);
query.addBindValue(resource.filename);
query.addBindValue(resource.mime);
query.addBindValue(resource.filename);
query.addBindValue(resource.timestamp);
query.addBindValue(resource.timestamp);
query.exec();
query = QSqlQuery(db);
query.prepare("INSERT INTO note_resources (resource_id, note_id) VALUES (?,?)");
query.addBindValue(resource.id);
query.addBindValue(n.id);
query.exec();
}
}
for (int noteIndex = 0; noteIndex < notes.size(); noteIndex++) {
Note n = notes[noteIndex];
// if (i != 8 || noteIndex != 3090) continue;
time_t reminderOrder = dateStringToTimestamp(n.reminderOrder);
QString markdown = xmltomd::evernoteXmlToMd(n.content);
QString markdown = xmltomd::evernoteXmlToMd(n.content, n.resources);
QString html(n.content);
html.replace("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", "");
html.replace("<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">", "");
html = html.trimmed();
html = "<style>* { margin: 0; padding:0; }</style><div style=\"width: 100%\"><div style=\"float: left; width: 45%; font-family:monospace;\">" + html + "</div><div style=\"float: left; width: 45%;\"><pre style=\"white-space: pre-wrap;\">" + markdown + "</pre></div></div>";
QString generatedPath = "D:/Web/www/joplin/tests/generated";
filePutContents(QString("%1/%2_%3.html").arg(generatedPath).arg(i).arg(noteIndex), html);
QSqlQuery query(db);
query.prepare("INSERT INTO notes (title, body, created_time, updated_time, longitude, latitude, altitude, source, author, source_url, is_todo, todo_due, todo_completed, source_application, application_data, `order`) VALUES (:title,:body,:created_time,:updated_time,:longitude,:latitude,:altitude,:source,:author,:source_url,:is_todo,:todo_due,:todo_completed,:source_application,:application_data,:order)");
query.prepare("INSERT INTO notes (id, title, body, created_time, updated_time, longitude, latitude, altitude, source, author, source_url, is_todo, todo_due, todo_completed, source_application, application_data, `order`) VALUES (:id, :title,:body,:created_time,:updated_time,:longitude,:latitude,:altitude,:source,:author,:source_url,:is_todo,:todo_due,:todo_completed,:source_application,:application_data,:order)");
query.bindValue(":id", n.id);
query.bindValue(":title", n.title);
query.bindValue(":body", markdown);
query.bindValue(":created_time", n.created);
@ -305,6 +449,14 @@ int main(int argc, char *argv[]) {
query.bindValue(":order", reminderOrder);
query.exec();
for (int tagIndex = 0; tagIndex < n.tags.size(); tagIndex++) {
QString tag = n.tags[tagIndex];
if (!tagNotes.contains(tag)) {
tagNotes[tag] = QList<Note>();
}
tagNotes[tag] << n;
}
QSqlError error = query.lastError();
if (error.isValid()) {
qWarning() << "SQL error:" << error;
@ -315,4 +467,29 @@ int main(int argc, char *argv[]) {
db.exec("COMMIT");
}
db.exec("BEGIN TRANSACTION");
for (QMap<QString, QList<Note>>::const_iterator it = tagNotes.begin(); it != tagNotes.end(); ++it) {
QString tagId = createUuid(QString("%1%2%3").arg(it.key()).arg((int)qrand()).arg(QDateTime::currentMSecsSinceEpoch()));
QSqlQuery query(db);
query.prepare("INSERT INTO tags (id, title, created_time, updated_time) VALUES (?,?,?,?)");
query.addBindValue(tagId);
query.addBindValue(it.key());
query.addBindValue(QDateTime::currentDateTime().toTime_t());
query.addBindValue(QDateTime::currentDateTime().toTime_t());
query.exec();
for (int i = 0; i < it.value().size(); i++) {
Note note = it.value()[i];
QSqlQuery query(db);
query.prepare("INSERT INTO note_tags (note_id, tag_id) VALUES (?,?)");
query.addBindValue(note.id);
query.addBindValue(tagId);
query.exec();
}
}
db.exec("COMMIT");
}

View File

@ -1,10 +1,12 @@
#include <QDebug>
#include <QTextCodec>
#include "xmltomd.h"
namespace xmltomd {
QMap<QString, QString> htmlEntities;
QStringList imageMimeTypes;
QString htmlEntityDecode(const QString& htmlEntity) {
if (!htmlEntities.size()) {
@ -318,24 +320,53 @@ QMap<QString, QString> attributes(QXmlStreamReader& reader) {
return output;
}
// TODO: remove inner whitespaces (double/triple spaces, etc.)
QString collapseWhiteSpace(QString text) {
if (text.trimmed().length() == 0) return QString();
bool isWhiteSpace(const QChar& c) {
return c == '\n' || c == '\r' || c == '\v' || c == '\f' || c == '\t' || c == ' ';
}
// Remove all \n and \r from the left and right of the text
while (text.length() && (text[0] == '\n' || text[0] == '\r')) text = text.right(text.length() - 1);
while (text.length() && (text[text.length() - 1] == '\n' || text[text.length() - 1] == '\r')) text = text.left(text.length() - 1);
// Like QString::simpified(), except that it preserves non-breaking spaces (which
// Evernote uses for identation, etc.)
QString simplifyString(const QString& s) {
QString output;
bool previousWhite = false;
for (int i = 0; i < s.length(); i++) {
QChar c = s[i];
bool isWhite = isWhiteSpace(c);
if (previousWhite && isWhite) {
// skip
} else {
output += c;
}
previousWhite = isWhite;
}
// Collapse all white spaces to just one. If there are spaces to the left and right of the string
// also collapse them to just one space.
bool spaceLeft = text.length() && text[0] == ' ';
bool spaceRight = text.length() && text[text.size() - 1] == ' ';
text = text.simplified();
while (output.length() && isWhiteSpace(output[0])) output = output.right(output.length() - 1);
while (output.length() && isWhiteSpace(output[output.length() - 1])) output = output.left(output.length() - 1);
if (spaceLeft) text = " " + text;
if (spaceRight) text = text + " ";
return output;
}
return text;
void collapseWhiteSpaceAndAppend(QStringList& lines, ParsingState& state, QString text) {
if (state.inCode) {
text = "\t" + text;
lines.append(text);
} else {
// Remove all \n and \r from the left and right of the text
while (text.length() && (text[0] == '\n' || text[0] == '\r')) text = text.right(text.length() - 1);
while (text.length() && (text[text.length() - 1] == '\n' || text[text.length() - 1] == '\r')) text = text.left(text.length() - 1);
// Collapse all white spaces to just one. If there are spaces to the left and right of the string
// also collapse them to just one space.
bool spaceLeft = text.length() && text[0] == ' ';
bool spaceRight = text.length() && text[text.size() - 1] == ' ';
text = simplifyString(text);
if (!spaceLeft && !spaceRight && text == "") return;
if (spaceLeft) lines.append(SPACE);
lines.append(text);
if (spaceRight) lines.append(SPACE);
}
}
bool isNewLineBlock(const QString& s) {
@ -393,6 +424,23 @@ QString processMdArrayNewLines(QStringList md) {
md = temp;
// NEW!!!
temp.clear();
last = "";
foreach (QString v, md) {
if (last == NEWLINE && (v == NEWLINE_MERGED || v == BLOCK_OPEN)) {
// Skip it
} else {
temp.push_back(v);
}
last = v;
}
md = temp;
if (md.size() > 2) {
if (md[md.size() - 2] == NEWLINE_MERGED && md[md.size() - 1] == NEWLINE) {
md.pop_back();
@ -400,12 +448,24 @@ QString processMdArrayNewLines(QStringList md) {
}
QString output;
QString previous;
bool start = true;
foreach (QString v, md) {
QString add;
if (v == BLOCK_CLOSE || v == BLOCK_OPEN || v == NEWLINE || v == NEWLINE_MERGED) {
output += "\n";
add = "\n";
} else if (v == SPACE) {
if (previous == SPACE || previous == "\n" || start) {
continue; // skip
} else {
add = " ";
}
} else {
output += v;
add = v;
}
start = false;
output += add;
previous = add;
}
if (!output.trimmed().length()) return QString();
@ -413,8 +473,31 @@ QString processMdArrayNewLines(QStringList md) {
return output;
}
bool isImageMimeType(const QString& m) {
if (!imageMimeTypes.size()) {
imageMimeTypes << "image/cgm" << "image/fits" << "image/g3fax" << "image/gif" << "image/ief" << "image/jp2" << "image/jpeg" << "image/jpm" << "image/jpx" << "image/naplps" << "image/png" << "image/prs.btif" << "image/prs.pti" << "image/t38" << "image/tiff" << "image/tiff-fx" << "image/vnd.adobe.photoshop" << "image/vnd.cns.inf2" << "image/vnd.djvu" << "image/vnd.dwg" << "image/vnd.dxf" << "image/vnd.fastbidsheet" << "image/vnd.fpx" << "image/vnd.fst" << "image/vnd.fujixerox.edmics-mmr" << "image/vnd.fujixerox.edmics-rlc" << "image/vnd.globalgraphics.pgb" << "image/vnd.microsoft.icon" << "image/vnd.mix" << "image/vnd.ms-modi" << "image/vnd.net-fpx" << "image/vnd.sealed.png" << "image/vnd.sealedmedia.softseal.gif" << "image/vnd.sealedmedia.softseal.jpg" << "image/vnd.svf" << "image/vnd.wap.wbmp" << "image/vnd.xiff";
}
return imageMimeTypes.contains(m, Qt::CaseInsensitive);
}
void addResourceTag(QStringList& lines, Resource& resource, const QString& alt = "") {
QString tagAlt = alt == "" ? resource.alt : alt;
if (isImageMimeType(resource.mime)) {
lines.append("![");
lines.append(tagAlt);
lines.append(QString("](:/%1)").arg(resource.id));
} else {
lines.append("[");
lines.append(tagAlt);
lines.append(QString("](:/%1)").arg(resource.id));
}
}
void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingState& state) {
// Attributes are rarely used in Evernote XML code, so they are only loaded as needed
// by the tag using `attrs = attributes(reader);`
QMap<QString, QString> attrs;
std::vector<QMap<QString, QString>> attributesLIFO;
while (!reader.atEnd()) {
reader.readNext();
@ -422,18 +505,19 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS
QStringRef n = reader.name();
if (reader.isStartElement()) {
attrs.clear();
attributesLIFO.push_back(attributes(reader));
if (isBlockTag(n)) {
lines.append(BLOCK_OPEN);
evernoteXmlToMdArray(reader, lines, state);
} else if (isStrongTag(n)) {
lines.append("**");
} else if (isAnchor(n)) {
attrs = attributes(reader);
lines.append("[");
} else if (isEmTag(n)) {
lines.append("*");
} else if (n == "en-todo") {
attrs = attributesLIFO.back();
QString checked = attrs["checked"] == "true" ? "X" : " ";
lines.append(QString("- [%1] ").arg(checked));
} else if (isListTag(n)) {
@ -470,7 +554,26 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS
} else if (n == "br") {
lines.append(NEWLINE);
} else if (n == "en-media") {
// TODO
attrs = attributesLIFO.back();
QString hash = attrs["hash"];
Resource resource;
for (int i = 0; i < state.resources.size(); i++) {
Resource r = state.resources[i];
if (r.id == hash) {
resource = r;
state.resources.erase(state.resources.begin() + i);
break;
}
}
// select * from notes where body like "%](:/%";
// If the resource does not appear among the note's resources, it
// means it's an attachement. It will be appended along with the
// other remaining resources at the bottom of the markdown text.
if (resource.id != "") {
addResourceTag(lines, resource, attrs["alt"]);
}
} else if (n == "span" || n == "font") {
// Ignore
} else {
@ -487,23 +590,22 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS
state.inCode = false;
lines.append(BLOCK_CLOSE);
} else if (isAnchor(n)) {
attrs = attributesLIFO.back();
QString href = attrs.contains("href") ? attrs["href"] : "";
lines.append(QString("](%1)").arg(href));
} else if (isListTag(n)) {
lines.append(BLOCK_CLOSE);
state.lists.pop_back();
} else if (n == "en-media") {
// TODO
lines.append("[EN-MEDIA TODO]");
// Skip
} else if (isIgnoredEndTag(n)) {
// Skip
} else {
qWarning() << "Unsupported end tag:" << n;
}
if (attributesLIFO.size()) attributesLIFO.pop_back();
} else if (reader.isCharacters()) {
QString text = state.inCode ? reader.text().toString() : collapseWhiteSpace(reader.text().toString());
if (state.inCode) text = "\t" + text;
if (text != "") lines.append(text);
collapseWhiteSpaceAndAppend(lines, state, reader.text().toString());
} else if (reader.isEndDocument()) {
// Ignore
} else if (reader.isEntityReference()) {
@ -514,14 +616,24 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS
}
}
QString evernoteXmlToMd(const QString& content) {
QString evernoteXmlToMd(const QString& content, std::vector<Resource> resources) {
QXmlStreamReader reader(content.toUtf8());
if (reader.readNextStartElement()) {
QStringList mdLines;
ParsingState parsingState;
parsingState.inCode = false;
parsingState.resources = resources;
evernoteXmlToMdArray(reader, mdLines, parsingState);
bool firstAttachment = true;
foreach (Resource r, parsingState.resources) {
if (firstAttachment) mdLines.push_back(NEWLINE);
mdLines.push_back(NEWLINE);
addResourceTag(mdLines, r, r.filename);
firstAttachment = false;
}
return processMdArrayNewLines(mdLines);
} else {
qWarning() << "Cannot parse XML:" << content;

View File

@ -7,17 +7,31 @@
namespace xmltomd {
struct Resource {
QString id;
QString mime;
QString filename;
QString alt;
QByteArray data;
time_t timestamp;
Resource() : timestamp(0) {}
};
const QString BLOCK_OPEN = "<div>";
const QString BLOCK_CLOSE = "</div>";
const QString NEWLINE = "<br/>";
const QString NEWLINE_MERGED = "<merged/>";
const QString SPACE = "<space/>";
struct ParsingState {
std::vector<std::pair<QString, int>> lists;
bool inCode;
std::vector<Resource> resources;
std::vector<Resource> attachments;
};
QString evernoteXmlToMd(const QString &content);
QString evernoteXmlToMd(const QString &content, std::vector<Resource> resources);
}

View File

@ -7,5 +7,12 @@
"var"
]
}
]
],
"build_systems":
[
{
"name": "Build evernote-import",
"shell_cmd": "D:\\Programmes\\cygwin\\bin\\bash.exe --login D:\\Web\\www\\joplin\\QtClient\\evernote-import\\build.sh"
}
]
}