mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-24 10:27:10 +02:00
Importing Evernote notes
This commit is contained in:
parent
9d1e62a872
commit
6c3c9d8a83
3
.gitignore
vendored
3
.gitignore
vendored
@ -21,3 +21,6 @@ QtClient/build-*-Debug/
|
||||
notes*.sqlite
|
||||
Makefile
|
||||
Makefile.*
|
||||
TODO.md
|
||||
tests/generated
|
||||
QtClient/JoplinQtClient/make.bat
|
@ -8,7 +8,7 @@
|
||||
using namespace jop;
|
||||
|
||||
Application::Application(int &argc, char **argv) : QGuiApplication(argc, argv) {
|
||||
db_ = Database("D:/Web/www/joplin/notes.sqlite");
|
||||
db_ = Database("D:/Web/www/joplin/QtClient/data/notes.sqlite");
|
||||
folderService_ = FolderService(db_);
|
||||
folderModel_.setService(folderService_);
|
||||
|
||||
|
@ -1,13 +1,12 @@
|
||||
CREATE TABLE folders (
|
||||
id INTEGER PRIMARY KEY,
|
||||
id TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
created_time INT,
|
||||
updated_time INT,
|
||||
remote_id TEXT
|
||||
updated_time INT
|
||||
);
|
||||
|
||||
CREATE TABLE notes (
|
||||
id INTEGER PRIMARY KEY,
|
||||
id TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
body TEXT,
|
||||
parent_id INT,
|
||||
@ -16,7 +15,6 @@ CREATE TABLE notes (
|
||||
latitude NUMERIC,
|
||||
longitude NUMERIC,
|
||||
altitude NUMERIC,
|
||||
remote_id TEXT,
|
||||
source TEXT,
|
||||
author TEXT,
|
||||
source_url TEXT,
|
||||
@ -29,12 +27,20 @@ CREATE TABLE notes (
|
||||
);
|
||||
|
||||
CREATE TABLE tags (
|
||||
id TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
created_time INT,
|
||||
updated_time INT
|
||||
);
|
||||
|
||||
CREATE TABLE note_tags (
|
||||
id INTEGER PRIMARY KEY,
|
||||
title TEXT
|
||||
note_id TEXT,
|
||||
tag_id TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE resources (
|
||||
id INTEGER PRIMARY KEY,
|
||||
id TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
mime TEXT,
|
||||
filename TEXT,
|
||||
@ -44,8 +50,8 @@ CREATE TABLE resources (
|
||||
|
||||
CREATE TABLE note_resources (
|
||||
id INTEGER PRIMARY KEY,
|
||||
note_id INT,
|
||||
resource_id INT
|
||||
note_id TEXT,
|
||||
resource_id TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE version (
|
||||
|
@ -1,13 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
"/cygdrive/c/Qt/5.7/msvc2015/bin/qmake.exe" "D:\\Web\\www\\joplin\\QtClient\\evernote-import\\evernote-import-qt.pro" -spec win32-msvc2015 "CONFIG+=debug" "CONFIG+=qml_debug"
|
||||
cd /cygdrive/d/Web/www/joplin/QtClient/build-evernote-import-qt-Visual_C_32_bites-Debug
|
||||
rm -rf debug/ release/ Makefile*
|
||||
export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin":$PATH
|
||||
export PATH=$PATH:"/cygdrive/c/Program Files (x86)/Windows Kits/8.1/bin/x86"
|
||||
export PATH=$PATH:"/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include"
|
||||
"/cygdrive/c/Qt/5.7/msvc2015/bin/qmake.exe" D:\\Web\\www\\joplin\\QtClient\\evernote-import\\evernote-import-qt.pro -spec win32-msvc2015 "CONFIG+=debug" "CONFIG+=qml_debug"
|
||||
"/cygdrive/c/Qt/Tools/QtCreator/bin/jom.exe" qmake_all
|
||||
"/cygdrive/c/Qt/Tools/QtCreator/bin/jom.exe"
|
||||
|
||||
# "/opt/Qt/5.7/gcc_64/bin/qmake" /home/laurent/src/notes/evernote-import-qt/evernote-import-qt.pro -spec linux-g++ CONFIG+=debug CONFIG+=qml_debug
|
||||
# "/usr/bin/make" qmake_all
|
||||
# make
|
||||
|
||||
# echo "============================================="
|
||||
# ./evernote-import-qt
|
||||
rsync -a /cygdrive/d/Web/www/joplin/QtClient/dependencies/dll-debug/ /cygdrive/d/Web/www/joplin/QtClient/build-evernote-import-qt-Visual_C_32_bites-Debug/debug
|
||||
cd -
|
Binary file not shown.
@ -25,3 +25,10 @@ DEFINES += QT_DEPRECATED_WARNINGS
|
||||
|
||||
HEADERS += \
|
||||
xmltomd.h
|
||||
|
||||
INCLUDEPATH += "C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/include"
|
||||
INCLUDEPATH += "C:/Program Files (x86)/Windows Kits/10/Include/10.0.10240.0/ucrt"
|
||||
|
||||
LIBS += -L"C:/Program Files (x86)/Microsoft Visual Studio 14.0/VC/lib"
|
||||
LIBS += -L"C:/Program Files (x86)/Windows Kits/8.1/Lib/winv6.3/um/x86"
|
||||
LIBS += -L"C:/Program Files (x86)/Windows Kits/10/Lib/10.0.10240.0/ucrt/x86"
|
||||
|
@ -8,21 +8,22 @@
|
||||
#include <QDir>
|
||||
#include <QSqlError>
|
||||
#include <QSqlRecord>
|
||||
#include <QCryptographicHash>
|
||||
#include <QTextCodec>
|
||||
|
||||
#include "xmltomd.h"
|
||||
|
||||
struct Resource {
|
||||
QString id;
|
||||
QString mime;
|
||||
QString filename;
|
||||
QByteArray data;
|
||||
time_t timestamp;
|
||||
struct EnMediaElement {
|
||||
QString hash;
|
||||
QString alt;
|
||||
};
|
||||
|
||||
Resource() : timestamp(0) {}
|
||||
struct ContentElements {
|
||||
QList<EnMediaElement> enMediaElements;
|
||||
};
|
||||
|
||||
struct Note {
|
||||
int id;
|
||||
QString id;
|
||||
QString title;
|
||||
QString content;
|
||||
time_t created;
|
||||
@ -39,11 +40,17 @@ struct Note {
|
||||
QString reminderTime;
|
||||
QString sourceApplication;
|
||||
QString applicationData;
|
||||
std::vector<Resource> resources;
|
||||
QList<EnMediaElement> enMediaElements;
|
||||
std::vector<xmltomd::Resource> resources;
|
||||
|
||||
Note() : created(0), updated(0) {}
|
||||
};
|
||||
|
||||
QString createUuid(const QString& s) {
|
||||
QString hash = QString(QCryptographicHash::hash(s.toUtf8(), QCryptographicHash::Sha256).toHex());
|
||||
return hash.left(32);
|
||||
}
|
||||
|
||||
time_t dateStringToTimestamp(const QString& s) {
|
||||
QDateTime d = QDateTime::fromString(s, "yyyyMMddThhmmssZ");
|
||||
d.setTimeSpec(Qt::UTC);
|
||||
@ -100,7 +107,7 @@ void parseAttributes(QXmlStreamReader& reader, Note& note) {
|
||||
// </resource-attributes>
|
||||
// </resource>
|
||||
|
||||
void parseResourceAttributes(QXmlStreamReader& reader, Resource& resource) {
|
||||
void parseResourceAttributes(QXmlStreamReader& reader, xmltomd::Resource& resource) {
|
||||
while (reader.readNextStartElement()) {
|
||||
if (reader.name() == "file-name") {
|
||||
resource.filename = reader.readElementText();
|
||||
@ -116,7 +123,7 @@ void parseResourceAttributes(QXmlStreamReader& reader, Resource& resource) {
|
||||
}
|
||||
}
|
||||
|
||||
void parseResourceRecognition(QXmlStreamReader& reader, Resource& resource) {
|
||||
void parseResourceRecognition(QXmlStreamReader& reader, xmltomd::Resource& resource) {
|
||||
QString recognitionXml = reader.readElementText();
|
||||
|
||||
QXmlStreamReader r(recognitionXml.toUtf8());
|
||||
@ -141,8 +148,8 @@ void parseResourceRecognition(QXmlStreamReader& reader, Resource& resource) {
|
||||
}
|
||||
}
|
||||
|
||||
Resource parseResource(QXmlStreamReader& reader) {
|
||||
Resource output;
|
||||
xmltomd::Resource parseResource(QXmlStreamReader& reader) {
|
||||
xmltomd::Resource output;
|
||||
while (reader.readNextStartElement()) {
|
||||
if (reader.name() == "data") {
|
||||
QString encoding = "";
|
||||
@ -154,7 +161,7 @@ Resource parseResource(QXmlStreamReader& reader) {
|
||||
}
|
||||
if (encoding != "base64") {
|
||||
qWarning() << "Unsupported <resource><data> encoding:" << encoding;
|
||||
return Resource();
|
||||
return xmltomd::Resource();
|
||||
}
|
||||
|
||||
output.data = QByteArray::fromBase64(reader.readElementText().toUtf8());
|
||||
@ -173,7 +180,37 @@ Resource parseResource(QXmlStreamReader& reader) {
|
||||
}
|
||||
}
|
||||
|
||||
//qDebug() << output.id << output.mime << output.filename << output.timestamp;
|
||||
if (!output.id.length()) {
|
||||
//output.id = createUuid(QString("%1%2%3%4").arg(output.filename).arg(output.timestamp).arg(QDateTime::currentMSecsSinceEpoch()).arg((int)qrand()));
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
ContentElements parseContentElements(const QString& content) {
|
||||
ContentElements output;
|
||||
QXmlStreamReader reader(content.toUtf8());
|
||||
|
||||
if (reader.readNextStartElement()) {
|
||||
while (!reader.atEnd()) {
|
||||
reader.readNext();
|
||||
|
||||
QStringRef n = reader.name();
|
||||
|
||||
if (reader.isStartElement()) {
|
||||
if (n == "en-media") {
|
||||
EnMediaElement e;
|
||||
foreach (const QXmlStreamAttribute &attr, reader.attributes()) {
|
||||
if (attr.name().toString() == "hash") e.hash = attr.value().toString();
|
||||
if (attr.name().toString() == "alt") e.alt = attr.value().toString();
|
||||
}
|
||||
output.enMediaElements << e;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
qWarning() << "Cannot parse XML:" << content;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
@ -186,6 +223,8 @@ Note parseNote(QXmlStreamReader& reader) {
|
||||
note.title = reader.readElementText();
|
||||
} else if (reader.name() == "content") {
|
||||
note.content = reader.readElementText();
|
||||
ContentElements contentElements = parseContentElements(note.content);
|
||||
note.enMediaElements = contentElements.enMediaElements;
|
||||
} else if (reader.name() == "created") {
|
||||
note.created = dateStringToTimestamp(reader.readElementText());
|
||||
} else if (reader.name() == "updated") {
|
||||
@ -202,11 +241,59 @@ Note parseNote(QXmlStreamReader& reader) {
|
||||
}
|
||||
}
|
||||
|
||||
//qDebug() << title << created << updated;
|
||||
note.id = createUuid(QString("%1%2%3%4%5")
|
||||
.arg(note.title)
|
||||
.arg(note.content)
|
||||
.arg(note.created)
|
||||
.arg(QDateTime::currentMSecsSinceEpoch())
|
||||
.arg((qint64)qrand()));
|
||||
|
||||
//qDebug() << note.longitude << note.latitude << note.source << note.author << note.sourceUrl;
|
||||
// This is a bit of a hack. Notes sometime have resources attached to it, but those <resource> tags don't contain
|
||||
// an "objID" tag, making it impossible to reference the resource. However, in this case the content of the note
|
||||
// will contain a corresponding <en-media/> tag, which has the ID in the "hash" attribute. All this information
|
||||
// has been collected above so we now set the resource ID to the hash attribute of the en-media tags. Here's an
|
||||
// example of note that shows this problem:
|
||||
|
||||
//qDebug() << note.sourceApplication << note.reminderOrder << note.reminderDoneTime;
|
||||
// <?xml version="1.0" encoding="UTF-8"?>
|
||||
// <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
|
||||
// <en-export export-date="20161221T203133Z" application="Evernote/Windows" version="6.x">
|
||||
// <note>
|
||||
// <title>Commande Asda</title>
|
||||
// <content>
|
||||
// <![CDATA[
|
||||
// <?xml version="1.0" encoding="UTF-8"?>
|
||||
// <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
|
||||
// <en-note>
|
||||
// <en-media alt="your QR code" hash="216a16a1bbe007fba4ccf60b118b4ccc" type="image/png"></en-media></en-note>]]>
|
||||
// </content>
|
||||
// <created>20160921T203424Z</created>
|
||||
// <updated>20160921T203438Z</updated>
|
||||
// <note-attributes>
|
||||
// <reminder-order>20160902T140445Z</reminder-order>
|
||||
// <reminder-done-time>20160924T101120Z</reminder-done-time>
|
||||
// </note-attributes>
|
||||
// <resource>
|
||||
// <data encoding="base64">........</data>
|
||||
// <mime>image/png</mime>
|
||||
// <width>150</width>
|
||||
// <height>150</height>
|
||||
// </resource>
|
||||
// </note>
|
||||
// </en-export>
|
||||
|
||||
int mediaHashIndex = 0;
|
||||
for (int i = 0; i < note.resources.size(); i++) {
|
||||
xmltomd::Resource& r = note.resources[i];
|
||||
if (r.id == "") {
|
||||
if (note.enMediaElements.size() <= mediaHashIndex) {
|
||||
qWarning() << "Resource without an ID and hash did not appear in note content:" << note.id;
|
||||
} else {
|
||||
r.id = note.enMediaElements[mediaHashIndex].hash;
|
||||
r.alt = note.enMediaElements[mediaHashIndex].alt;
|
||||
mediaHashIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return note;
|
||||
}
|
||||
@ -220,6 +307,9 @@ std::vector<Note> parseXmlFile(const QString& filePath) {
|
||||
return output;
|
||||
}
|
||||
|
||||
QTextStream in(&file);
|
||||
in.setCodec("UTF-8");
|
||||
|
||||
QByteArray fileData = file.readAll();
|
||||
|
||||
QXmlStreamReader reader(fileData);
|
||||
@ -239,11 +329,25 @@ std::vector<Note> parseXmlFile(const QString& filePath) {
|
||||
return output;
|
||||
}
|
||||
|
||||
void filePutContents(const QString& filePath, const QString& content) {
|
||||
QFile file(filePath);
|
||||
if (file.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
|
||||
QTextStream stream(&file);
|
||||
stream << content;
|
||||
} else {
|
||||
qCritical() << "Cannot write to" << filePath;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
QCoreApplication a(argc, argv);
|
||||
|
||||
QString dbPath = "D:/Web/www/joplin/notes.sqlite";
|
||||
QTextCodec::setCodecForLocale(QTextCodec::codecForName("UTF-8"));
|
||||
|
||||
qsrand(QTime::currentTime().msec());
|
||||
|
||||
QString dbPath = "D:/Web/www/joplin/QtClient/data/notes.sqlite";
|
||||
QString resourceDir = "D:/Web/www/joplin/QtClient/data/resources";
|
||||
|
||||
QSqlDatabase db = QSqlDatabase::addDatabase("QSQLITE");
|
||||
db.setDatabaseName(dbPath);
|
||||
@ -258,19 +362,24 @@ int main(int argc, char *argv[]) {
|
||||
// TODO: REMOVE REMOVE REMOVE
|
||||
db.exec("DELETE FROM folders");
|
||||
db.exec("DELETE FROM notes");
|
||||
db.exec("DELETE FROM resources");
|
||||
db.exec("DELETE FROM note_resources");
|
||||
db.exec("DELETE FROM tags");
|
||||
// TODO: REMOVE REMOVE REMOVE
|
||||
|
||||
QDir dir("S:/Docs/Textes/Calendrier/EvernoteBackup/Enex20161219");
|
||||
dir.setFilter(QDir::Files | QDir::Hidden | QDir::NoSymLinks);
|
||||
QFileInfoList fileList = dir.entryInfoList();
|
||||
qDebug() << fileList.size();
|
||||
QMap<QString, QList<Note>> tagNotes;
|
||||
|
||||
for (int i = 0; i < fileList.size(); ++i) {
|
||||
QFileInfo fileInfo = fileList.at(i);
|
||||
|
||||
db.exec("BEGIN TRANSACTION");
|
||||
|
||||
QSqlQuery query(db);
|
||||
query.prepare("INSERT INTO folders (title, created_time, updated_time) VALUES (?, ?, ?)");
|
||||
query.prepare("INSERT INTO folders (id, title, created_time, updated_time) VALUES (?, ?, ?, ?)");
|
||||
query.addBindValue(createUuid(QString("%1%2%3%4").arg(fileInfo.baseName()).arg(fileInfo.created().toTime_t()).arg((int)qrand()).arg(QDateTime::currentMSecsSinceEpoch())));
|
||||
query.addBindValue(fileInfo.baseName());
|
||||
query.addBindValue(fileInfo.created().toTime_t());
|
||||
query.addBindValue(fileInfo.created().toTime_t());
|
||||
@ -280,13 +389,48 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
for (int noteIndex = 0; noteIndex < notes.size(); noteIndex++) {
|
||||
Note n = notes[noteIndex];
|
||||
for (int resourceIndex = 0; resourceIndex < n.resources.size(); resourceIndex++) {
|
||||
xmltomd::Resource resource = n.resources[resourceIndex];
|
||||
QSqlQuery query(db);
|
||||
query.prepare("INSERT INTO resources (id, title, mime, filename, created_time, updated_time) VALUES (?,?,?,?,?,?)");
|
||||
query.addBindValue(resource.id);
|
||||
query.addBindValue(resource.filename);
|
||||
query.addBindValue(resource.mime);
|
||||
query.addBindValue(resource.filename);
|
||||
query.addBindValue(resource.timestamp);
|
||||
query.addBindValue(resource.timestamp);
|
||||
query.exec();
|
||||
|
||||
query = QSqlQuery(db);
|
||||
query.prepare("INSERT INTO note_resources (resource_id, note_id) VALUES (?,?)");
|
||||
query.addBindValue(resource.id);
|
||||
query.addBindValue(n.id);
|
||||
query.exec();
|
||||
}
|
||||
}
|
||||
|
||||
for (int noteIndex = 0; noteIndex < notes.size(); noteIndex++) {
|
||||
Note n = notes[noteIndex];
|
||||
|
||||
// if (i != 8 || noteIndex != 3090) continue;
|
||||
|
||||
time_t reminderOrder = dateStringToTimestamp(n.reminderOrder);
|
||||
|
||||
QString markdown = xmltomd::evernoteXmlToMd(n.content);
|
||||
QString markdown = xmltomd::evernoteXmlToMd(n.content, n.resources);
|
||||
|
||||
QString html(n.content);
|
||||
html.replace("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", "");
|
||||
html.replace("<!DOCTYPE en-note SYSTEM \"http://xml.evernote.com/pub/enml2.dtd\">", "");
|
||||
html = html.trimmed();
|
||||
|
||||
html = "<style>* { margin: 0; padding:0; }</style><div style=\"width: 100%\"><div style=\"float: left; width: 45%; font-family:monospace;\">" + html + "</div><div style=\"float: left; width: 45%;\"><pre style=\"white-space: pre-wrap;\">" + markdown + "</pre></div></div>";
|
||||
|
||||
QString generatedPath = "D:/Web/www/joplin/tests/generated";
|
||||
filePutContents(QString("%1/%2_%3.html").arg(generatedPath).arg(i).arg(noteIndex), html);
|
||||
|
||||
QSqlQuery query(db);
|
||||
query.prepare("INSERT INTO notes (title, body, created_time, updated_time, longitude, latitude, altitude, source, author, source_url, is_todo, todo_due, todo_completed, source_application, application_data, `order`) VALUES (:title,:body,:created_time,:updated_time,:longitude,:latitude,:altitude,:source,:author,:source_url,:is_todo,:todo_due,:todo_completed,:source_application,:application_data,:order)");
|
||||
query.prepare("INSERT INTO notes (id, title, body, created_time, updated_time, longitude, latitude, altitude, source, author, source_url, is_todo, todo_due, todo_completed, source_application, application_data, `order`) VALUES (:id, :title,:body,:created_time,:updated_time,:longitude,:latitude,:altitude,:source,:author,:source_url,:is_todo,:todo_due,:todo_completed,:source_application,:application_data,:order)");
|
||||
query.bindValue(":id", n.id);
|
||||
query.bindValue(":title", n.title);
|
||||
query.bindValue(":body", markdown);
|
||||
query.bindValue(":created_time", n.created);
|
||||
@ -305,6 +449,14 @@ int main(int argc, char *argv[]) {
|
||||
query.bindValue(":order", reminderOrder);
|
||||
query.exec();
|
||||
|
||||
for (int tagIndex = 0; tagIndex < n.tags.size(); tagIndex++) {
|
||||
QString tag = n.tags[tagIndex];
|
||||
if (!tagNotes.contains(tag)) {
|
||||
tagNotes[tag] = QList<Note>();
|
||||
}
|
||||
tagNotes[tag] << n;
|
||||
}
|
||||
|
||||
QSqlError error = query.lastError();
|
||||
if (error.isValid()) {
|
||||
qWarning() << "SQL error:" << error;
|
||||
@ -315,4 +467,29 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
db.exec("COMMIT");
|
||||
}
|
||||
|
||||
db.exec("BEGIN TRANSACTION");
|
||||
|
||||
for (QMap<QString, QList<Note>>::const_iterator it = tagNotes.begin(); it != tagNotes.end(); ++it) {
|
||||
QString tagId = createUuid(QString("%1%2%3").arg(it.key()).arg((int)qrand()).arg(QDateTime::currentMSecsSinceEpoch()));
|
||||
|
||||
QSqlQuery query(db);
|
||||
query.prepare("INSERT INTO tags (id, title, created_time, updated_time) VALUES (?,?,?,?)");
|
||||
query.addBindValue(tagId);
|
||||
query.addBindValue(it.key());
|
||||
query.addBindValue(QDateTime::currentDateTime().toTime_t());
|
||||
query.addBindValue(QDateTime::currentDateTime().toTime_t());
|
||||
query.exec();
|
||||
|
||||
for (int i = 0; i < it.value().size(); i++) {
|
||||
Note note = it.value()[i];
|
||||
QSqlQuery query(db);
|
||||
query.prepare("INSERT INTO note_tags (note_id, tag_id) VALUES (?,?)");
|
||||
query.addBindValue(note.id);
|
||||
query.addBindValue(tagId);
|
||||
query.exec();
|
||||
}
|
||||
}
|
||||
|
||||
db.exec("COMMIT");
|
||||
}
|
||||
|
@ -1,10 +1,12 @@
|
||||
#include <QDebug>
|
||||
#include <QTextCodec>
|
||||
|
||||
#include "xmltomd.h"
|
||||
|
||||
namespace xmltomd {
|
||||
|
||||
QMap<QString, QString> htmlEntities;
|
||||
QStringList imageMimeTypes;
|
||||
|
||||
QString htmlEntityDecode(const QString& htmlEntity) {
|
||||
if (!htmlEntities.size()) {
|
||||
@ -318,10 +320,37 @@ QMap<QString, QString> attributes(QXmlStreamReader& reader) {
|
||||
return output;
|
||||
}
|
||||
|
||||
// TODO: remove inner whitespaces (double/triple spaces, etc.)
|
||||
QString collapseWhiteSpace(QString text) {
|
||||
if (text.trimmed().length() == 0) return QString();
|
||||
bool isWhiteSpace(const QChar& c) {
|
||||
return c == '\n' || c == '\r' || c == '\v' || c == '\f' || c == '\t' || c == ' ';
|
||||
}
|
||||
|
||||
// Like QString::simpified(), except that it preserves non-breaking spaces (which
|
||||
// Evernote uses for identation, etc.)
|
||||
QString simplifyString(const QString& s) {
|
||||
QString output;
|
||||
bool previousWhite = false;
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
QChar c = s[i];
|
||||
bool isWhite = isWhiteSpace(c);
|
||||
if (previousWhite && isWhite) {
|
||||
// skip
|
||||
} else {
|
||||
output += c;
|
||||
}
|
||||
previousWhite = isWhite;
|
||||
}
|
||||
|
||||
while (output.length() && isWhiteSpace(output[0])) output = output.right(output.length() - 1);
|
||||
while (output.length() && isWhiteSpace(output[output.length() - 1])) output = output.left(output.length() - 1);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void collapseWhiteSpaceAndAppend(QStringList& lines, ParsingState& state, QString text) {
|
||||
if (state.inCode) {
|
||||
text = "\t" + text;
|
||||
lines.append(text);
|
||||
} else {
|
||||
// Remove all \n and \r from the left and right of the text
|
||||
while (text.length() && (text[0] == '\n' || text[0] == '\r')) text = text.right(text.length() - 1);
|
||||
while (text.length() && (text[text.length() - 1] == '\n' || text[text.length() - 1] == '\r')) text = text.left(text.length() - 1);
|
||||
@ -330,12 +359,14 @@ QString collapseWhiteSpace(QString text) {
|
||||
// also collapse them to just one space.
|
||||
bool spaceLeft = text.length() && text[0] == ' ';
|
||||
bool spaceRight = text.length() && text[text.size() - 1] == ' ';
|
||||
text = text.simplified();
|
||||
text = simplifyString(text);
|
||||
|
||||
if (spaceLeft) text = " " + text;
|
||||
if (spaceRight) text = text + " ";
|
||||
if (!spaceLeft && !spaceRight && text == "") return;
|
||||
|
||||
return text;
|
||||
if (spaceLeft) lines.append(SPACE);
|
||||
lines.append(text);
|
||||
if (spaceRight) lines.append(SPACE);
|
||||
}
|
||||
}
|
||||
|
||||
bool isNewLineBlock(const QString& s) {
|
||||
@ -393,6 +424,23 @@ QString processMdArrayNewLines(QStringList md) {
|
||||
md = temp;
|
||||
|
||||
|
||||
|
||||
// NEW!!!
|
||||
temp.clear();
|
||||
last = "";
|
||||
foreach (QString v, md) {
|
||||
if (last == NEWLINE && (v == NEWLINE_MERGED || v == BLOCK_OPEN)) {
|
||||
// Skip it
|
||||
} else {
|
||||
temp.push_back(v);
|
||||
}
|
||||
last = v;
|
||||
}
|
||||
md = temp;
|
||||
|
||||
|
||||
|
||||
|
||||
if (md.size() > 2) {
|
||||
if (md[md.size() - 2] == NEWLINE_MERGED && md[md.size() - 1] == NEWLINE) {
|
||||
md.pop_back();
|
||||
@ -400,12 +448,24 @@ QString processMdArrayNewLines(QStringList md) {
|
||||
}
|
||||
|
||||
QString output;
|
||||
QString previous;
|
||||
bool start = true;
|
||||
foreach (QString v, md) {
|
||||
QString add;
|
||||
if (v == BLOCK_CLOSE || v == BLOCK_OPEN || v == NEWLINE || v == NEWLINE_MERGED) {
|
||||
output += "\n";
|
||||
add = "\n";
|
||||
} else if (v == SPACE) {
|
||||
if (previous == SPACE || previous == "\n" || start) {
|
||||
continue; // skip
|
||||
} else {
|
||||
output += v;
|
||||
add = " ";
|
||||
}
|
||||
} else {
|
||||
add = v;
|
||||
}
|
||||
start = false;
|
||||
output += add;
|
||||
previous = add;
|
||||
}
|
||||
|
||||
if (!output.trimmed().length()) return QString();
|
||||
@ -413,8 +473,31 @@ QString processMdArrayNewLines(QStringList md) {
|
||||
return output;
|
||||
}
|
||||
|
||||
bool isImageMimeType(const QString& m) {
|
||||
if (!imageMimeTypes.size()) {
|
||||
imageMimeTypes << "image/cgm" << "image/fits" << "image/g3fax" << "image/gif" << "image/ief" << "image/jp2" << "image/jpeg" << "image/jpm" << "image/jpx" << "image/naplps" << "image/png" << "image/prs.btif" << "image/prs.pti" << "image/t38" << "image/tiff" << "image/tiff-fx" << "image/vnd.adobe.photoshop" << "image/vnd.cns.inf2" << "image/vnd.djvu" << "image/vnd.dwg" << "image/vnd.dxf" << "image/vnd.fastbidsheet" << "image/vnd.fpx" << "image/vnd.fst" << "image/vnd.fujixerox.edmics-mmr" << "image/vnd.fujixerox.edmics-rlc" << "image/vnd.globalgraphics.pgb" << "image/vnd.microsoft.icon" << "image/vnd.mix" << "image/vnd.ms-modi" << "image/vnd.net-fpx" << "image/vnd.sealed.png" << "image/vnd.sealedmedia.softseal.gif" << "image/vnd.sealedmedia.softseal.jpg" << "image/vnd.svf" << "image/vnd.wap.wbmp" << "image/vnd.xiff";
|
||||
}
|
||||
return imageMimeTypes.contains(m, Qt::CaseInsensitive);
|
||||
}
|
||||
|
||||
void addResourceTag(QStringList& lines, Resource& resource, const QString& alt = "") {
|
||||
QString tagAlt = alt == "" ? resource.alt : alt;
|
||||
if (isImageMimeType(resource.mime)) {
|
||||
lines.append("![");
|
||||
lines.append(tagAlt);
|
||||
lines.append(QString("](:/%1)").arg(resource.id));
|
||||
} else {
|
||||
lines.append("[");
|
||||
lines.append(tagAlt);
|
||||
lines.append(QString("](:/%1)").arg(resource.id));
|
||||
}
|
||||
}
|
||||
|
||||
void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingState& state) {
|
||||
// Attributes are rarely used in Evernote XML code, so they are only loaded as needed
|
||||
// by the tag using `attrs = attributes(reader);`
|
||||
QMap<QString, QString> attrs;
|
||||
std::vector<QMap<QString, QString>> attributesLIFO;
|
||||
|
||||
while (!reader.atEnd()) {
|
||||
reader.readNext();
|
||||
@ -422,18 +505,19 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS
|
||||
QStringRef n = reader.name();
|
||||
|
||||
if (reader.isStartElement()) {
|
||||
attrs.clear();
|
||||
attributesLIFO.push_back(attributes(reader));
|
||||
|
||||
if (isBlockTag(n)) {
|
||||
lines.append(BLOCK_OPEN);
|
||||
evernoteXmlToMdArray(reader, lines, state);
|
||||
} else if (isStrongTag(n)) {
|
||||
lines.append("**");
|
||||
} else if (isAnchor(n)) {
|
||||
attrs = attributes(reader);
|
||||
lines.append("[");
|
||||
} else if (isEmTag(n)) {
|
||||
lines.append("*");
|
||||
} else if (n == "en-todo") {
|
||||
attrs = attributesLIFO.back();
|
||||
QString checked = attrs["checked"] == "true" ? "X" : " ";
|
||||
lines.append(QString("- [%1] ").arg(checked));
|
||||
} else if (isListTag(n)) {
|
||||
@ -470,7 +554,26 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS
|
||||
} else if (n == "br") {
|
||||
lines.append(NEWLINE);
|
||||
} else if (n == "en-media") {
|
||||
// TODO
|
||||
attrs = attributesLIFO.back();
|
||||
QString hash = attrs["hash"];
|
||||
Resource resource;
|
||||
for (int i = 0; i < state.resources.size(); i++) {
|
||||
Resource r = state.resources[i];
|
||||
if (r.id == hash) {
|
||||
resource = r;
|
||||
state.resources.erase(state.resources.begin() + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// select * from notes where body like "%](:/%";
|
||||
|
||||
// If the resource does not appear among the note's resources, it
|
||||
// means it's an attachement. It will be appended along with the
|
||||
// other remaining resources at the bottom of the markdown text.
|
||||
if (resource.id != "") {
|
||||
addResourceTag(lines, resource, attrs["alt"]);
|
||||
}
|
||||
} else if (n == "span" || n == "font") {
|
||||
// Ignore
|
||||
} else {
|
||||
@ -487,23 +590,22 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS
|
||||
state.inCode = false;
|
||||
lines.append(BLOCK_CLOSE);
|
||||
} else if (isAnchor(n)) {
|
||||
attrs = attributesLIFO.back();
|
||||
QString href = attrs.contains("href") ? attrs["href"] : "";
|
||||
lines.append(QString("](%1)").arg(href));
|
||||
} else if (isListTag(n)) {
|
||||
lines.append(BLOCK_CLOSE);
|
||||
state.lists.pop_back();
|
||||
} else if (n == "en-media") {
|
||||
// TODO
|
||||
lines.append("[EN-MEDIA TODO]");
|
||||
// Skip
|
||||
} else if (isIgnoredEndTag(n)) {
|
||||
// Skip
|
||||
} else {
|
||||
qWarning() << "Unsupported end tag:" << n;
|
||||
}
|
||||
if (attributesLIFO.size()) attributesLIFO.pop_back();
|
||||
} else if (reader.isCharacters()) {
|
||||
QString text = state.inCode ? reader.text().toString() : collapseWhiteSpace(reader.text().toString());
|
||||
if (state.inCode) text = "\t" + text;
|
||||
if (text != "") lines.append(text);
|
||||
collapseWhiteSpaceAndAppend(lines, state, reader.text().toString());
|
||||
} else if (reader.isEndDocument()) {
|
||||
// Ignore
|
||||
} else if (reader.isEntityReference()) {
|
||||
@ -514,14 +616,24 @@ void evernoteXmlToMdArray(QXmlStreamReader& reader, QStringList& lines, ParsingS
|
||||
}
|
||||
}
|
||||
|
||||
QString evernoteXmlToMd(const QString& content) {
|
||||
QString evernoteXmlToMd(const QString& content, std::vector<Resource> resources) {
|
||||
QXmlStreamReader reader(content.toUtf8());
|
||||
|
||||
if (reader.readNextStartElement()) {
|
||||
QStringList mdLines;
|
||||
ParsingState parsingState;
|
||||
parsingState.inCode = false;
|
||||
parsingState.resources = resources;
|
||||
evernoteXmlToMdArray(reader, mdLines, parsingState);
|
||||
|
||||
bool firstAttachment = true;
|
||||
foreach (Resource r, parsingState.resources) {
|
||||
if (firstAttachment) mdLines.push_back(NEWLINE);
|
||||
mdLines.push_back(NEWLINE);
|
||||
addResourceTag(mdLines, r, r.filename);
|
||||
firstAttachment = false;
|
||||
}
|
||||
|
||||
return processMdArrayNewLines(mdLines);
|
||||
} else {
|
||||
qWarning() << "Cannot parse XML:" << content;
|
||||
|
@ -7,17 +7,31 @@
|
||||
|
||||
namespace xmltomd {
|
||||
|
||||
struct Resource {
|
||||
QString id;
|
||||
QString mime;
|
||||
QString filename;
|
||||
QString alt;
|
||||
QByteArray data;
|
||||
time_t timestamp;
|
||||
|
||||
Resource() : timestamp(0) {}
|
||||
};
|
||||
|
||||
const QString BLOCK_OPEN = "<div>";
|
||||
const QString BLOCK_CLOSE = "</div>";
|
||||
const QString NEWLINE = "<br/>";
|
||||
const QString NEWLINE_MERGED = "<merged/>";
|
||||
const QString SPACE = "<space/>";
|
||||
|
||||
struct ParsingState {
|
||||
std::vector<std::pair<QString, int>> lists;
|
||||
bool inCode;
|
||||
std::vector<Resource> resources;
|
||||
std::vector<Resource> attachments;
|
||||
};
|
||||
|
||||
QString evernoteXmlToMd(const QString &content);
|
||||
QString evernoteXmlToMd(const QString &content, std::vector<Resource> resources);
|
||||
|
||||
}
|
||||
|
||||
|
@ -7,5 +7,12 @@
|
||||
"var"
|
||||
]
|
||||
}
|
||||
],
|
||||
"build_systems":
|
||||
[
|
||||
{
|
||||
"name": "Build evernote-import",
|
||||
"shell_cmd": "D:\\Programmes\\cygwin\\bin\\bash.exe --login D:\\Web\\www\\joplin\\QtClient\\evernote-import\\build.sh"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user