diff --git a/.gitignore b/.gitignore index df09d6955..07cde2866 100755 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,7 @@ QtClient/build-*-Debug/ *.pro.user notes*.sqlite Makefile -Makefile.* \ No newline at end of file +Makefile.* +TODO.md +tests/generated +QtClient/JoplinQtClient/make.bat \ No newline at end of file diff --git a/QtClient/JoplinQtClient/application.cpp b/QtClient/JoplinQtClient/application.cpp index c675644fe..26c875d7c 100755 --- a/QtClient/JoplinQtClient/application.cpp +++ b/QtClient/JoplinQtClient/application.cpp @@ -8,7 +8,7 @@ using namespace jop; Application::Application(int &argc, char **argv) : QGuiApplication(argc, argv) { - db_ = Database("D:/Web/www/joplin/notes.sqlite"); + db_ = Database("D:/Web/www/joplin/QtClient/data/notes.sqlite"); folderService_ = FolderService(db_); folderModel_.setService(folderService_); diff --git a/QtClient/JoplinQtClient/schema.sql b/QtClient/JoplinQtClient/schema.sql index e91e796f6..1557dafc7 100755 --- a/QtClient/JoplinQtClient/schema.sql +++ b/QtClient/JoplinQtClient/schema.sql @@ -1,13 +1,12 @@ CREATE TABLE folders ( - id INTEGER PRIMARY KEY, + id TEXT PRIMARY KEY, title TEXT, created_time INT, - updated_time INT, - remote_id TEXT + updated_time INT ); CREATE TABLE notes ( - id INTEGER PRIMARY KEY, + id TEXT PRIMARY KEY, title TEXT, body TEXT, parent_id INT, @@ -16,7 +15,6 @@ CREATE TABLE notes ( latitude NUMERIC, longitude NUMERIC, altitude NUMERIC, - remote_id TEXT, source TEXT, author TEXT, source_url TEXT, @@ -29,12 +27,20 @@ CREATE TABLE notes ( ); CREATE TABLE tags ( - id INTEGER PRIMARY KEY, - title TEXT + id TEXT PRIMARY KEY, + title TEXT, + created_time INT, + updated_time INT +); + +CREATE TABLE note_tags ( + id INTEGER PRIMARY KEY, + note_id TEXT, + tag_id TEXT ); CREATE TABLE resources ( - id INTEGER PRIMARY KEY, + id TEXT PRIMARY KEY, title TEXT, mime TEXT, filename TEXT, @@ -43,9 +49,9 @@ CREATE TABLE resources ( ); CREATE TABLE note_resources ( - id INTEGER PRIMARY KEY, - note_id INT, - resource_id INT + id INTEGER PRIMARY KEY, + note_id TEXT, + resource_id TEXT ); CREATE TABLE version ( diff --git a/QtClient/evernote-import/build.sh b/QtClient/evernote-import/build.sh index 4ebdae377..cff9e920f 100755 --- a/QtClient/evernote-import/build.sh +++ b/QtClient/evernote-import/build.sh @@ -1,13 +1,14 @@ #!/bin/bash + set -e -"/cygdrive/c/Qt/5.7/msvc2015/bin/qmake.exe" "D:\\Web\\www\\joplin\\QtClient\\evernote-import\\evernote-import-qt.pro" -spec win32-msvc2015 "CONFIG+=debug" "CONFIG+=qml_debug" +cd /cygdrive/d/Web/www/joplin/QtClient/build-evernote-import-qt-Visual_C_32_bites-Debug +rm -rf debug/ release/ Makefile* +export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin":$PATH +export PATH=$PATH:"/cygdrive/c/Program Files (x86)/Windows Kits/8.1/bin/x86" +export PATH=$PATH:"/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include" +"/cygdrive/c/Qt/5.7/msvc2015/bin/qmake.exe" D:\\Web\\www\\joplin\\QtClient\\evernote-import\\evernote-import-qt.pro -spec win32-msvc2015 "CONFIG+=debug" "CONFIG+=qml_debug" "/cygdrive/c/Qt/Tools/QtCreator/bin/jom.exe" qmake_all "/cygdrive/c/Qt/Tools/QtCreator/bin/jom.exe" - -# "/opt/Qt/5.7/gcc_64/bin/qmake" /home/laurent/src/notes/evernote-import-qt/evernote-import-qt.pro -spec linux-g++ CONFIG+=debug CONFIG+=qml_debug -# "/usr/bin/make" qmake_all -# make - -# echo "=============================================" -# ./evernote-import-qt \ No newline at end of file +rsync -a /cygdrive/d/Web/www/joplin/QtClient/dependencies/dll-debug/ /cygdrive/d/Web/www/joplin/QtClient/build-evernote-import-qt-Visual_C_32_bites-Debug/debug +cd - \ No newline at end of file diff --git a/QtClient/evernote-import/evernote-import-qt b/QtClient/evernote-import/evernote-import-qt deleted file mode 100755 index 390c8dffb..000000000 Binary files a/QtClient/evernote-import/evernote-import-qt and /dev/null differ diff --git a/QtClient/evernote-import/evernote-import-qt.pro b/QtClient/evernote-import/evernote-import-qt.pro index ba9150fdd..604b809bf 100755 --- a/QtClient/evernote-import/evernote-import-qt.pro +++ b/QtClient/evernote-import/evernote-import-qt.pro @@ -25,3 +25,10 @@ DEFINES += QT_DEPRECATED_WARNINGS HEADERS += \ xmltomd.h + +INCLUDEPATH += "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include" +INCLUDEPATH += "C:/Program Files (x86)/Windows Kits/10/Include/10.0.10240.0/ucrt" + +LIBS += -L"C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/lib" +LIBS += -L"C:/Program Files (x86)/Windows Kits/8.1/Lib/winv6.3/um/x86" +LIBS += -L"C:/Program Files (x86)/Windows Kits/10/Lib/10.0.10240.0/ucrt/x86" diff --git a/QtClient/evernote-import/main.cpp b/QtClient/evernote-import/main.cpp index a3fd6ceb6..807fb851a 100755 --- a/QtClient/evernote-import/main.cpp +++ b/QtClient/evernote-import/main.cpp @@ -8,21 +8,22 @@ #include #include #include +#include +#include #include "xmltomd.h" -struct Resource { - QString id; - QString mime; - QString filename; - QByteArray data; - time_t timestamp; +struct EnMediaElement { + QString hash; + QString alt; +}; - Resource() : timestamp(0) {} +struct ContentElements { + QList enMediaElements; }; struct Note { - int id; + QString id; QString title; QString content; time_t created; @@ -39,11 +40,17 @@ struct Note { QString reminderTime; QString sourceApplication; QString applicationData; - std::vector resources; + QList enMediaElements; + std::vector resources; Note() : created(0), updated(0) {} }; +QString createUuid(const QString& s) { + QString hash = QString(QCryptographicHash::hash(s.toUtf8(), QCryptographicHash::Sha256).toHex()); + return hash.left(32); +} + time_t dateStringToTimestamp(const QString& s) { QDateTime d = QDateTime::fromString(s, "yyyyMMddThhmmssZ"); d.setTimeSpec(Qt::UTC); @@ -100,7 +107,7 @@ void parseAttributes(QXmlStreamReader& reader, Note& note) { // // -void parseResourceAttributes(QXmlStreamReader& reader, Resource& resource) { +void parseResourceAttributes(QXmlStreamReader& reader, xmltomd::Resource& resource) { while (reader.readNextStartElement()) { if (reader.name() == "file-name") { resource.filename = reader.readElementText(); @@ -116,7 +123,7 @@ void parseResourceAttributes(QXmlStreamReader& reader, Resource& resource) { } } -void parseResourceRecognition(QXmlStreamReader& reader, Resource& resource) { +void parseResourceRecognition(QXmlStreamReader& reader, xmltomd::Resource& resource) { QString recognitionXml = reader.readElementText(); QXmlStreamReader r(recognitionXml.toUtf8()); @@ -141,8 +148,8 @@ void parseResourceRecognition(QXmlStreamReader& reader, Resource& resource) { } } -Resource parseResource(QXmlStreamReader& reader) { - Resource output; +xmltomd::Resource parseResource(QXmlStreamReader& reader) { + xmltomd::Resource output; while (reader.readNextStartElement()) { if (reader.name() == "data") { QString encoding = ""; @@ -154,7 +161,7 @@ Resource parseResource(QXmlStreamReader& reader) { } if (encoding != "base64") { qWarning() << "Unsupported encoding:" << encoding; - return Resource(); + return xmltomd::Resource(); } output.data = QByteArray::fromBase64(reader.readElementText().toUtf8()); @@ -173,7 +180,37 @@ Resource parseResource(QXmlStreamReader& reader) { } } - //qDebug() << output.id << output.mime << output.filename << output.timestamp; + if (!output.id.length()) { + //output.id = createUuid(QString("%1%2%3%4").arg(output.filename).arg(output.timestamp).arg(QDateTime::currentMSecsSinceEpoch()).arg((int)qrand())); + } + + return output; +} + +ContentElements parseContentElements(const QString& content) { + ContentElements output; + QXmlStreamReader reader(content.toUtf8()); + + if (reader.readNextStartElement()) { + while (!reader.atEnd()) { + reader.readNext(); + + QStringRef n = reader.name(); + + if (reader.isStartElement()) { + if (n == "en-media") { + EnMediaElement e; + foreach (const QXmlStreamAttribute &attr, reader.attributes()) { + if (attr.name().toString() == "hash") e.hash = attr.value().toString(); + if (attr.name().toString() == "alt") e.alt = attr.value().toString(); + } + output.enMediaElements << e; + } + } + } + } else { + qWarning() << "Cannot parse XML:" << content; + } return output; } @@ -186,6 +223,8 @@ Note parseNote(QXmlStreamReader& reader) { note.title = reader.readElementText(); } else if (reader.name() == "content") { note.content = reader.readElementText(); + ContentElements contentElements = parseContentElements(note.content); + note.enMediaElements = contentElements.enMediaElements; } else if (reader.name() == "created") { note.created = dateStringToTimestamp(reader.readElementText()); } else if (reader.name() == "updated") { @@ -202,11 +241,59 @@ Note parseNote(QXmlStreamReader& reader) { } } - //qDebug() << title << created << updated; + note.id = createUuid(QString("%1%2%3%4%5") + .arg(note.title) + .arg(note.content) + .arg(note.created) + .arg(QDateTime::currentMSecsSinceEpoch()) + .arg((qint64)qrand())); - //qDebug() << note.longitude << note.latitude << note.source << note.author << note.sourceUrl; + // This is a bit of a hack. Notes sometime have resources attached to it, but those tags don't contain + // an "objID" tag, making it impossible to reference the resource. However, in this case the content of the note + // will contain a corresponding tag, which has the ID in the "hash" attribute. All this information + // has been collected above so we now set the resource ID to the hash attribute of the en-media tags. Here's an + // example of note that shows this problem: - //qDebug() << note.sourceApplication << note.reminderOrder << note.reminderDoneTime; + // + // + // + // + // Commande Asda + // + // + // + // + // ]]> + // + // 20160921T203424Z + // 20160921T203438Z + // + // 20160902T140445Z + // 20160924T101120Z + // + // + // ........ + // image/png + // 150 + // 150 + // + // + // + + int mediaHashIndex = 0; + for (int i = 0; i < note.resources.size(); i++) { + xmltomd::Resource& r = note.resources[i]; + if (r.id == "") { + if (note.enMediaElements.size() <= mediaHashIndex) { + qWarning() << "Resource without an ID and hash did not appear in note content:" << note.id; + } else { + r.id = note.enMediaElements[mediaHashIndex].hash; + r.alt = note.enMediaElements[mediaHashIndex].alt; + mediaHashIndex++; + } + } + } return note; } @@ -220,6 +307,9 @@ std::vector parseXmlFile(const QString& filePath) { return output; } + QTextStream in(&file); + in.setCodec("UTF-8"); + QByteArray fileData = file.readAll(); QXmlStreamReader reader(fileData); @@ -239,11 +329,25 @@ std::vector parseXmlFile(const QString& filePath) { return output; } +void filePutContents(const QString& filePath, const QString& content) { + QFile file(filePath); + if (file.open(QIODevice::WriteOnly | QIODevice::Truncate)) { + QTextStream stream(&file); + stream << content; + } else { + qCritical() << "Cannot write to" << filePath; + } +} int main(int argc, char *argv[]) { QCoreApplication a(argc, argv); - QString dbPath = "D:/Web/www/joplin/notes.sqlite"; + QTextCodec::setCodecForLocale(QTextCodec::codecForName("UTF-8")); + + qsrand(QTime::currentTime().msec()); + + QString dbPath = "D:/Web/www/joplin/QtClient/data/notes.sqlite"; + QString resourceDir = "D:/Web/www/joplin/QtClient/data/resources"; QSqlDatabase db = QSqlDatabase::addDatabase("QSQLITE"); db.setDatabaseName(dbPath); @@ -258,19 +362,24 @@ int main(int argc, char *argv[]) { // TODO: REMOVE REMOVE REMOVE db.exec("DELETE FROM folders"); db.exec("DELETE FROM notes"); + db.exec("DELETE FROM resources"); + db.exec("DELETE FROM note_resources"); + db.exec("DELETE FROM tags"); // TODO: REMOVE REMOVE REMOVE QDir dir("S:/Docs/Textes/Calendrier/EvernoteBackup/Enex20161219"); dir.setFilter(QDir::Files | QDir::Hidden | QDir::NoSymLinks); QFileInfoList fileList = dir.entryInfoList(); - qDebug() << fileList.size(); + QMap> tagNotes; + for (int i = 0; i < fileList.size(); ++i) { QFileInfo fileInfo = fileList.at(i); db.exec("BEGIN TRANSACTION"); QSqlQuery query(db); - query.prepare("INSERT INTO folders (title, created_time, updated_time) VALUES (?, ?, ?)"); + query.prepare("INSERT INTO folders (id, title, created_time, updated_time) VALUES (?, ?, ?, ?)"); + query.addBindValue(createUuid(QString("%1%2%3%4").arg(fileInfo.baseName()).arg(fileInfo.created().toTime_t()).arg((int)qrand()).arg(QDateTime::currentMSecsSinceEpoch()))); query.addBindValue(fileInfo.baseName()); query.addBindValue(fileInfo.created().toTime_t()); query.addBindValue(fileInfo.created().toTime_t()); @@ -280,13 +389,48 @@ int main(int argc, char *argv[]) { for (int noteIndex = 0; noteIndex < notes.size(); noteIndex++) { Note n = notes[noteIndex]; + for (int resourceIndex = 0; resourceIndex < n.resources.size(); resourceIndex++) { + xmltomd::Resource resource = n.resources[resourceIndex]; + QSqlQuery query(db); + query.prepare("INSERT INTO resources (id, title, mime, filename, created_time, updated_time) VALUES (?,?,?,?,?,?)"); + query.addBindValue(resource.id); + query.addBindValue(resource.filename); + query.addBindValue(resource.mime); + query.addBindValue(resource.filename); + query.addBindValue(resource.timestamp); + query.addBindValue(resource.timestamp); + query.exec(); + + query = QSqlQuery(db); + query.prepare("INSERT INTO note_resources (resource_id, note_id) VALUES (?,?)"); + query.addBindValue(resource.id); + query.addBindValue(n.id); + query.exec(); + } + } + + for (int noteIndex = 0; noteIndex < notes.size(); noteIndex++) { + Note n = notes[noteIndex]; + + // if (i != 8 || noteIndex != 3090) continue; time_t reminderOrder = dateStringToTimestamp(n.reminderOrder); - QString markdown = xmltomd::evernoteXmlToMd(n.content); + QString markdown = xmltomd::evernoteXmlToMd(n.content, n.resources); + + QString html(n.content); + html.replace("", ""); + html.replace("", ""); + html = html.trimmed(); + + html = "
" + html + "
" + markdown + "
"; + + QString generatedPath = "D:/Web/www/joplin/tests/generated"; + filePutContents(QString("%1/%2_%3.html").arg(generatedPath).arg(i).arg(noteIndex), html); QSqlQuery query(db); - query.prepare("INSERT INTO notes (title, body, created_time, updated_time, longitude, latitude, altitude, source, author, source_url, is_todo, todo_due, todo_completed, source_application, application_data, `order`) VALUES (:title,:body,:created_time,:updated_time,:longitude,:latitude,:altitude,:source,:author,:source_url,:is_todo,:todo_due,:todo_completed,:source_application,:application_data,:order)"); + query.prepare("INSERT INTO notes (id, title, body, created_time, updated_time, longitude, latitude, altitude, source, author, source_url, is_todo, todo_due, todo_completed, source_application, application_data, `order`) VALUES (:id, :title,:body,:created_time,:updated_time,:longitude,:latitude,:altitude,:source,:author,:source_url,:is_todo,:todo_due,:todo_completed,:source_application,:application_data,:order)"); + query.bindValue(":id", n.id); query.bindValue(":title", n.title); query.bindValue(":body", markdown); query.bindValue(":created_time", n.created); @@ -305,6 +449,14 @@ int main(int argc, char *argv[]) { query.bindValue(":order", reminderOrder); query.exec(); + for (int tagIndex = 0; tagIndex < n.tags.size(); tagIndex++) { + QString tag = n.tags[tagIndex]; + if (!tagNotes.contains(tag)) { + tagNotes[tag] = QList(); + } + tagNotes[tag] << n; + } + QSqlError error = query.lastError(); if (error.isValid()) { qWarning() << "SQL error:" << error; @@ -315,4 +467,29 @@ int main(int argc, char *argv[]) { db.exec("COMMIT"); } + + db.exec("BEGIN TRANSACTION"); + + for (QMap>::const_iterator it = tagNotes.begin(); it != tagNotes.end(); ++it) { + QString tagId = createUuid(QString("%1%2%3").arg(it.key()).arg((int)qrand()).arg(QDateTime::currentMSecsSinceEpoch())); + + QSqlQuery query(db); + query.prepare("INSERT INTO tags (id, title, created_time, updated_time) VALUES (?,?,?,?)"); + query.addBindValue(tagId); + query.addBindValue(it.key()); + query.addBindValue(QDateTime::currentDateTime().toTime_t()); + query.addBindValue(QDateTime::currentDateTime().toTime_t()); + query.exec(); + + for (int i = 0; i < it.value().size(); i++) { + Note note = it.value()[i]; + QSqlQuery query(db); + query.prepare("INSERT INTO note_tags (note_id, tag_id) VALUES (?,?)"); + query.addBindValue(note.id); + query.addBindValue(tagId); + query.exec(); + } + } + + db.exec("COMMIT"); } diff --git a/QtClient/evernote-import/xmltomd.cpp b/QtClient/evernote-import/xmltomd.cpp index 48f962137..8a8820a87 100755 --- a/QtClient/evernote-import/xmltomd.cpp +++ b/QtClient/evernote-import/xmltomd.cpp @@ -1,10 +1,12 @@ #include +#include #include "xmltomd.h" namespace xmltomd { QMap htmlEntities; +QStringList imageMimeTypes; QString htmlEntityDecode(const QString& htmlEntity) { if (!htmlEntities.size()) { @@ -318,24 +320,53 @@ QMap attributes(QXmlStreamReader& reader) { return output; } -// TODO: remove inner whitespaces (double/triple spaces, etc.) -QString collapseWhiteSpace(QString text) { - if (text.trimmed().length() == 0) return QString(); +bool isWhiteSpace(const QChar& c) { + return c == '\n' || c == '\r' || c == '\v' || c == '\f' || c == '\t' || c == ' '; +} - // Remove all \n and \r from the left and right of the text - while (text.length() && (text[0] == '\n' || text[0] == '\r')) text = text.right(text.length() - 1); - while (text.length() && (text[text.length() - 1] == '\n' || text[text.length() - 1] == '\r')) text = text.left(text.length() - 1); +// Like QString::simpified(), except that it preserves non-breaking spaces (which +// Evernote uses for identation, etc.) +QString simplifyString(const QString& s) { + QString output; + bool previousWhite = false; + for (int i = 0; i < s.length(); i++) { + QChar c = s[i]; + bool isWhite = isWhiteSpace(c); + if (previousWhite && isWhite) { + // skip + } else { + output += c; + } + previousWhite = isWhite; + } - // Collapse all white spaces to just one. If there are spaces to the left and right of the string - // also collapse them to just one space. - bool spaceLeft = text.length() && text[0] == ' '; - bool spaceRight = text.length() && text[text.size() - 1] == ' '; - text = text.simplified(); + while (output.length() && isWhiteSpace(output[0])) output = output.right(output.length() - 1); + while (output.length() && isWhiteSpace(output[output.length() - 1])) output = output.left(output.length() - 1); - if (spaceLeft) text = " " + text; - if (spaceRight) text = text + " "; + return output; +} - return text; +void collapseWhiteSpaceAndAppend(QStringList& lines, ParsingState& state, QString text) { + if (state.inCode) { + text = "\t" + text; + lines.append(text); + } else { + // Remove all \n and \r from the left and right of the text + while (text.length() && (text[0] == '\n' || text[0] == '\r')) text = text.right(text.length() - 1); + while (text.length() && (text[text.length() - 1] == '\n' || text[text.length() - 1] == '\r')) text = text.left(text.length() - 1); + + // Collapse all white spaces to just one. If there are spaces to the left and right of the string + // also collapse them to just one space. + bool spaceLeft = text.length() && text[0] == ' '; + bool spaceRight = text.length() && text[text.size() - 1] == ' '; + text = simplifyString(text); + + if (!spaceLeft && !spaceRight && text == "") return; + + if (spaceLeft) lines.append(SPACE); + lines.append(text); + if (spaceRight) lines.append(SPACE); + } } bool isNewLineBlock(const QString& s) { @@ -393,6 +424,23 @@ QString processMdArrayNewLines(QStringList md) { md = temp; + + // NEW!!! + temp.clear(); + last = ""; + foreach (QString v, md) { + if (last == NEWLINE && (v == NEWLINE_MERGED || v == BLOCK_OPEN)) { + // Skip it + } else { + temp.push_back(v); + } + last = v; + } + md = temp; + + + + if (md.size() > 2) { if (md[md.size() - 2] == NEWLINE_MERGED && md[md.size() - 1] == NEWLINE) { md.pop_back(); @@ -400,12 +448,24 @@ QString processMdArrayNewLines(QStringList md) { } QString output; + QString previous; + bool start = true; foreach (QString v, md) { + QString add; if (v == BLOCK_CLOSE || v == BLOCK_OPEN || v == NEWLINE || v == NEWLINE_MERGED) { - output += "\n"; + add = "\n"; + } else if (v == SPACE) { + if (previous == SPACE || previous == "\n" || start) { + continue; // skip + } else { + add = " "; + } } else { - output += v; + add = v; } + start = false; + output += add; + previous = add; } if (!output.trimmed().length()) return QString(); @@ -413,8 +473,31 @@ QString processMdArrayNewLines(QStringList md) { return output; } +bool isImageMimeType(const QString& m) { + if (!imageMimeTypes.size()) { + imageMimeTypes << "image/cgm" << "image/fits" << "image/g3fax" << "image/gif" << "image/ief" << "image/jp2" << "image/jpeg" << "image/jpm" << "image/jpx" << "image/naplps" << "image/png" << "image/prs.btif" << "image/prs.pti" << "image/t38" << "image/tiff" << "image/tiff-fx" << "image/vnd.adobe.photoshop" << "image/vnd.cns.inf2" << "image/vnd.djvu" << "image/vnd.dwg" << "image/vnd.dxf" << "image/vnd.fastbidsheet" << "image/vnd.fpx" << "image/vnd.fst" << "image/vnd.fujixerox.edmics-mmr" << "image/vnd.fujixerox.edmics-rlc" << "image/vnd.globalgraphics.pgb" << "image/vnd.microsoft.icon" << "image/vnd.mix" << "image/vnd.ms-modi" << "image/vnd.net-fpx" << "image/vnd.sealed.png" << "image/vnd.sealedmedia.softseal.gif" << "image/vnd.sealedmedia.softseal.jpg" << "image/vnd.svf" << "image/vnd.wap.wbmp" << "image/vnd.xiff"; + } + return imageMimeTypes.contains(m, Qt::CaseInsensitive); +} + +void addResourceTag(QStringList& lines, Resource& resource, const QString& alt = "") { + QString tagAlt = alt == "" ? resource.alt : alt; + if (isImageMimeType(resource.mime)) { + lines.append("!["); + lines.append(tagAlt); + lines.append(QString("](:/%1)").arg(resource.id)); + } else { + lines.append("["); + lines.append(tagAlt); + lines.append(QString("](:/%1)").arg(resource.id)); + } +} + void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingState& state) { + // Attributes are rarely used in Evernote XML code, so they are only loaded as needed + // by the tag using `attrs = attributes(reader);` QMap attrs; + std::vector> attributesLIFO; while (!reader.atEnd()) { reader.readNext(); @@ -422,18 +505,19 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS QStringRef n = reader.name(); if (reader.isStartElement()) { - attrs.clear(); + attributesLIFO.push_back(attributes(reader)); + if (isBlockTag(n)) { lines.append(BLOCK_OPEN); evernoteXmlToMdArray(reader, lines, state); } else if (isStrongTag(n)) { lines.append("**"); } else if (isAnchor(n)) { - attrs = attributes(reader); lines.append("["); } else if (isEmTag(n)) { lines.append("*"); } else if (n == "en-todo") { + attrs = attributesLIFO.back(); QString checked = attrs["checked"] == "true" ? "X" : " "; lines.append(QString("- [%1] ").arg(checked)); } else if (isListTag(n)) { @@ -470,7 +554,26 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS } else if (n == "br") { lines.append(NEWLINE); } else if (n == "en-media") { - // TODO + attrs = attributesLIFO.back(); + QString hash = attrs["hash"]; + Resource resource; + for (int i = 0; i < state.resources.size(); i++) { + Resource r = state.resources[i]; + if (r.id == hash) { + resource = r; + state.resources.erase(state.resources.begin() + i); + break; + } + } + + // select * from notes where body like "%](:/%"; + + // If the resource does not appear among the note's resources, it + // means it's an attachement. It will be appended along with the + // other remaining resources at the bottom of the markdown text. + if (resource.id != "") { + addResourceTag(lines, resource, attrs["alt"]); + } } else if (n == "span" || n == "font") { // Ignore } else { @@ -487,23 +590,22 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS state.inCode = false; lines.append(BLOCK_CLOSE); } else if (isAnchor(n)) { + attrs = attributesLIFO.back(); QString href = attrs.contains("href") ? attrs["href"] : ""; lines.append(QString("](%1)").arg(href)); } else if (isListTag(n)) { lines.append(BLOCK_CLOSE); state.lists.pop_back(); } else if (n == "en-media") { - // TODO - lines.append("[EN-MEDIA TODO]"); + // Skip } else if (isIgnoredEndTag(n)) { // Skip } else { qWarning() << "Unsupported end tag:" << n; } + if (attributesLIFO.size()) attributesLIFO.pop_back(); } else if (reader.isCharacters()) { - QString text = state.inCode ? reader.text().toString() : collapseWhiteSpace(reader.text().toString()); - if (state.inCode) text = "\t" + text; - if (text != "") lines.append(text); + collapseWhiteSpaceAndAppend(lines, state, reader.text().toString()); } else if (reader.isEndDocument()) { // Ignore } else if (reader.isEntityReference()) { @@ -514,14 +616,24 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS } } -QString evernoteXmlToMd(const QString& content) { +QString evernoteXmlToMd(const QString& content, std::vector resources) { QXmlStreamReader reader(content.toUtf8()); if (reader.readNextStartElement()) { QStringList mdLines; ParsingState parsingState; parsingState.inCode = false; + parsingState.resources = resources; evernoteXmlToMdArray(reader, mdLines, parsingState); + + bool firstAttachment = true; + foreach (Resource r, parsingState.resources) { + if (firstAttachment) mdLines.push_back(NEWLINE); + mdLines.push_back(NEWLINE); + addResourceTag(mdLines, r, r.filename); + firstAttachment = false; + } + return processMdArrayNewLines(mdLines); } else { qWarning() << "Cannot parse XML:" << content; diff --git a/QtClient/evernote-import/xmltomd.h b/QtClient/evernote-import/xmltomd.h index 923b6b827..d4c4a5c1d 100755 --- a/QtClient/evernote-import/xmltomd.h +++ b/QtClient/evernote-import/xmltomd.h @@ -7,17 +7,31 @@ namespace xmltomd { + struct Resource { + QString id; + QString mime; + QString filename; + QString alt; + QByteArray data; + time_t timestamp; + + Resource() : timestamp(0) {} + }; + const QString BLOCK_OPEN = "
"; const QString BLOCK_CLOSE = "
"; const QString NEWLINE = "
"; const QString NEWLINE_MERGED = ""; + const QString SPACE = ""; struct ParsingState { std::vector> lists; bool inCode; + std::vector resources; + std::vector attachments; }; - QString evernoteXmlToMd(const QString &content); + QString evernoteXmlToMd(const QString &content, std::vector resources); } diff --git a/joplin.sublime-project b/joplin.sublime-project index 0ee13814f..2bc6fcdb5 100755 --- a/joplin.sublime-project +++ b/joplin.sublime-project @@ -7,5 +7,12 @@ "var" ] } - ] + ], + "build_systems": + [ + { + "name": "Build evernote-import", + "shell_cmd": "D:\\Programmes\\cygwin\\bin\\bash.exe --login D:\\Web\\www\\joplin\\QtClient\\evernote-import\\build.sh" + } + ] }