2021-01-22 19:41:11 +02:00
import BaseModel , { ModelType } from '../BaseModel' ;
import { RevisionEntity } from '../services/database/types' ;
import BaseItem from './BaseItem' ;
2019-05-06 22:35:29 +02:00
const DiffMatchPatch = require ( 'diff-match-patch' ) ;
2022-05-26 16:57:44 +02:00
import * as ArrayUtils from '../ArrayUtils' ;
2021-05-13 18:57:37 +02:00
import JoplinError from '../JoplinError' ;
2019-05-06 22:35:29 +02:00
const { sprintf } = require ( 'sprintf-js' ) ;
const dmp = new DiffMatchPatch ( ) ;
2022-03-03 15:20:29 +02:00
export interface ObjectPatch {
new : Record < string , any > ;
deleted : string [ ] ;
}
2021-01-22 19:41:11 +02:00
export default class Revision extends BaseItem {
2019-05-06 22:35:29 +02:00
static tableName() {
return 'revisions' ;
}
static modelType() {
return BaseModel . TYPE_REVISION ;
}
2021-06-20 12:19:59 +02:00
public static createTextPatchLegacy ( oldText : string , newText : string ) : string {
2019-05-06 22:35:29 +02:00
return dmp . patch_toText ( dmp . patch_make ( oldText , newText ) ) ;
}
2021-06-20 12:19:59 +02:00
public static createTextPatch ( oldText : string , newText : string ) : string {
2021-06-21 11:06:36 +02:00
// Note that, once parsed, the resulting object will not exactly be like
// a dmp patch object. This is because the library overrides the
// toString() prototype function of the dmp patch object, and uses it in
// certain functions. For example, in patch_toText(). It means that when
// calling patch_toText() with an object that has been JSON-stringified
// and JSON-parsed, it will not work.
//
// This is mostly fine for our purpose. It's only a problem in
// Revision.patchStats() because it was based on parsing the GNU diff
// as returned by patch_toText().
2021-06-20 12:19:59 +02:00
return JSON . stringify ( dmp . patch_make ( oldText , newText ) ) ;
}
public static applyTextPatchLegacy ( text : string , patch : string ) : string {
2019-05-06 22:35:29 +02:00
patch = dmp . patch_fromText ( patch ) ;
const result = dmp . patch_apply ( patch , text ) ;
if ( ! result || ! result . length ) throw new Error ( 'Could not apply patch' ) ;
return result [ 0 ] ;
}
2021-06-20 12:19:59 +02:00
private static isLegacyPatch ( patch : string ) : boolean {
return patch && patch . indexOf ( '@@' ) === 0 ;
}
private static isNewPatch ( patch : string ) : boolean {
if ( ! patch ) return true ;
2022-02-27 12:30:40 +02:00
return patch . indexOf ( '[{' ) === 0 || patch === '[]' ;
2021-06-20 12:19:59 +02:00
}
public static applyTextPatch ( text : string , patch : string ) : string {
if ( this . isLegacyPatch ( patch ) ) {
return this . applyTextPatchLegacy ( text , patch ) ;
} else {
2021-06-20 14:59:58 +02:00
// An empty patch should be '[]', but legacy data may be just "".
// However an empty string would make JSON.parse fail so we set it
// to '[]'.
2022-02-27 12:30:40 +02:00
const result = dmp . patch_apply ( this . parsePatch ( patch ) , text ) ;
2021-06-20 12:19:59 +02:00
if ( ! result || ! result . length ) throw new Error ( 'Could not apply patch' ) ;
return result [ 0 ] ;
}
}
public static isEmptyRevision ( rev : RevisionEntity ) : boolean {
if ( this . isLegacyPatch ( rev . title_diff ) && rev . title_diff ) return false ;
if ( this . isLegacyPatch ( rev . body_diff ) && rev . body_diff ) return false ;
if ( this . isNewPatch ( rev . title_diff ) && rev . title_diff && rev . title_diff !== '[]' ) return false ;
if ( this . isNewPatch ( rev . body_diff ) && rev . body_diff && rev . body_diff !== '[]' ) return false ;
const md = rev . metadata_diff ? JSON . parse ( rev . metadata_diff ) : { } ;
if ( md . new && Object . keys ( md . new ) . length ) return false ;
if ( md . deleted && Object . keys ( md . deleted ) . length ) return false ;
return true ;
}
2022-02-26 20:20:23 +02:00
public static createObjectPatch ( oldObject : any , newObject : any ) {
2019-05-06 22:35:29 +02:00
if ( ! oldObject ) oldObject = { } ;
2022-03-03 15:20:29 +02:00
const output : ObjectPatch = {
2019-05-06 22:35:29 +02:00
new : { } ,
deleted : [ ] ,
} ;
2020-03-14 01:46:14 +02:00
for ( const k in newObject ) {
2019-05-06 22:35:29 +02:00
if ( ! newObject . hasOwnProperty ( k ) ) continue ;
if ( oldObject [ k ] === newObject [ k ] ) continue ;
output . new [ k ] = newObject [ k ] ;
}
2020-03-14 01:46:14 +02:00
for ( const k in oldObject ) {
2019-05-06 22:35:29 +02:00
if ( ! oldObject . hasOwnProperty ( k ) ) continue ;
if ( ! ( k in newObject ) ) output . deleted . push ( k ) ;
}
return JSON . stringify ( output ) ;
}
2022-03-03 15:20:29 +02:00
// We need to sanitise the object patch because it seems some are broken and
// may contain new lines: https://github.com/laurent22/joplin/issues/6209
private static sanitizeObjectPatch ( patch : string ) : string {
return patch . replace ( /[\n\r]/g , '' ) ;
}
public static applyObjectPatch ( object : any , patch : string ) {
const parsedPatch : ObjectPatch = JSON . parse ( this . sanitizeObjectPatch ( patch ) ) ;
2019-05-06 22:35:29 +02:00
const output = Object . assign ( { } , object ) ;
2019-07-29 15:43:53 +02:00
2022-03-03 15:20:29 +02:00
for ( const k in parsedPatch . new ) {
output [ k ] = parsedPatch . new [ k ] ;
2019-05-06 22:35:29 +02:00
}
2022-03-03 15:20:29 +02:00
for ( let i = 0 ; i < parsedPatch . deleted . length ; i ++ ) {
delete output [ parsedPatch . deleted [ i ] ] ;
2019-05-06 22:35:29 +02:00
}
return output ;
}
2021-06-21 11:06:36 +02:00
// Turn a new-style patch into an approximation of a GNU diff format.
// Approximation, because the only goal is to put "+" or "-" before each
// line, so that it can be processed by patchStats().
private static newPatchToDiffFormat ( patch : string ) : string {
const changeList : string [ ] = [ ] ;
2022-02-27 12:30:40 +02:00
const patchArray = this . parsePatch ( patch ) ;
2021-06-21 11:06:36 +02:00
for ( const patchItem of patchArray ) {
for ( const d of patchItem . diffs ) {
if ( d [ 0 ] !== 0 ) changeList . push ( d [ 0 ] < 0 ? ` - ${ d [ 1 ] . replace ( /[\n\r]/g , ' ' ) } ` : ` + ${ d [ 1 ] . trim ( ) . replace ( /[\n\r]/g , ' ' ) } ` ) ;
}
}
return changeList . join ( '\n' ) ;
}
public static patchStats ( patch : string ) {
2019-05-24 18:31:18 +02:00
if ( typeof patch === 'object' ) throw new Error ( 'Not implemented' ) ;
2021-06-21 11:06:36 +02:00
if ( this . isNewPatch ( patch ) ) {
try {
patch = this . newPatchToDiffFormat ( patch ) ;
} catch ( error ) {
// Normally it should work but if it doesn't we don't want it to
// crash the app since it's just presentational. But log an
// error so that it can eventually be fixed.
console . error ( 'Could not generate diff:' , error , patch ) ;
return { added : 0 , removed : 0 } ;
}
}
2021-01-22 19:41:11 +02:00
const countChars = ( diffLine : string ) = > {
2019-05-24 18:31:18 +02:00
return unescape ( diffLine ) . length - 1 ;
2019-07-29 15:43:53 +02:00
} ;
2019-05-24 18:31:18 +02:00
const lines = patch . split ( '\n' ) ;
let added = 0 ;
let removed = 0 ;
for ( const line of lines ) {
if ( line . indexOf ( '-' ) === 0 ) {
removed += countChars ( line ) ;
continue ;
}
if ( line . indexOf ( '+' ) === 0 ) {
added += countChars ( line ) ;
continue ;
}
}
return {
added : added ,
removed : removed ,
} ;
}
2021-01-22 19:41:11 +02:00
static revisionPatchStatsText ( rev : RevisionEntity ) {
2019-05-24 18:31:18 +02:00
const titleStats = this . patchStats ( rev . title_diff ) ;
const bodyStats = this . patchStats ( rev . body_diff ) ;
const total = {
added : titleStats.added + bodyStats . added ,
removed : titleStats.removed + bodyStats . removed ,
} ;
const output = [ ] ;
2019-09-19 23:51:18 +02:00
if ( total . removed ) output . push ( ` - ${ total . removed } ` ) ;
output . push ( ` + ${ total . added } ` ) ;
2019-05-24 18:31:18 +02:00
return output . join ( ', ' ) ;
}
2021-01-22 19:41:11 +02:00
static async countRevisions ( itemType : ModelType , itemId : string ) {
2019-07-29 15:43:53 +02:00
const r = await this . db ( ) . selectOne ( 'SELECT count(*) as total FROM revisions WHERE item_type = ? AND item_id = ?' , [ itemType , itemId ] ) ;
2019-05-06 22:35:29 +02:00
return r ? r.total : 0 ;
}
2021-01-22 19:41:11 +02:00
static latestRevision ( itemType : ModelType , itemId : string ) {
2019-07-29 15:43:53 +02:00
return this . modelSelectOne ( 'SELECT * FROM revisions WHERE item_type = ? AND item_id = ? ORDER BY item_updated_time DESC LIMIT 1' , [ itemType , itemId ] ) ;
2019-05-06 22:35:29 +02:00
}
2021-01-22 19:41:11 +02:00
static allByType ( itemType : ModelType , itemId : string ) {
2019-07-29 15:43:53 +02:00
return this . modelSelectAll ( 'SELECT * FROM revisions WHERE item_type = ? AND item_id = ? ORDER BY item_updated_time ASC' , [ itemType , itemId ] ) ;
2019-05-06 22:35:29 +02:00
}
2021-01-22 19:41:11 +02:00
static async itemsWithRevisions ( itemType : ModelType , itemIds : string [ ] ) {
2019-05-06 22:35:29 +02:00
if ( ! itemIds . length ) return [ ] ;
2019-09-19 23:51:18 +02:00
const rows = await this . db ( ) . selectAll ( ` SELECT distinct item_id FROM revisions WHERE item_type = ? AND item_id IN (" ${ itemIds . join ( '","' ) } ") ` , [ itemType ] ) ;
2019-05-06 22:35:29 +02:00
2021-01-22 19:41:11 +02:00
return rows . map ( ( r : RevisionEntity ) = > r . item_id ) ;
2019-07-29 15:43:53 +02:00
}
2019-05-06 22:35:29 +02:00
2021-01-22 19:41:11 +02:00
static async itemsWithNoRevisions ( itemType : ModelType , itemIds : string [ ] ) {
2019-05-06 22:35:29 +02:00
const withRevs = await this . itemsWithRevisions ( itemType , itemIds ) ;
const output = [ ] ;
for ( let i = 0 ; i < itemIds . length ; i ++ ) {
if ( withRevs . indexOf ( itemIds [ i ] ) < 0 ) output . push ( itemIds [ i ] ) ;
}
return ArrayUtils . unique ( output ) ;
}
2021-01-22 19:41:11 +02:00
static moveRevisionToTop ( revision : RevisionEntity , revs : RevisionEntity [ ] ) {
2019-05-06 22:35:29 +02:00
let targetIndex = - 1 ;
for ( let i = revs . length - 1 ; i >= 0 ; i -- ) {
const rev = revs [ i ] ;
if ( rev . id === revision . id ) {
targetIndex = i ;
break ;
}
}
2019-09-19 23:51:18 +02:00
if ( targetIndex < 0 ) throw new Error ( ` Could not find revision: ${ revision . id } ` ) ;
2019-05-06 22:35:29 +02:00
if ( targetIndex !== revs . length - 1 ) {
revs = revs . slice ( ) ;
const toTop = revs [ targetIndex ] ;
revs . splice ( targetIndex , 1 ) ;
revs . push ( toTop ) ;
}
return revs ;
}
// Note: revs must be sorted by update_time ASC (as returned by allByType)
2022-02-26 20:20:23 +02:00
public static async mergeDiffs ( revision : RevisionEntity , revs : RevisionEntity [ ] = null ) {
2019-05-06 22:35:29 +02:00
if ( ! ( 'encryption_applied' in revision ) || ! ! revision . encryption_applied ) throw new JoplinError ( 'Target revision is encrypted' , 'revision_encrypted' ) ;
if ( ! revs ) {
2019-07-29 15:43:53 +02:00
revs = await this . modelSelectAll ( 'SELECT * FROM revisions WHERE item_type = ? AND item_id = ? AND item_updated_time <= ? ORDER BY item_updated_time ASC' , [ revision . item_type , revision . item_id , revision . item_updated_time ] ) ;
2019-05-06 22:35:29 +02:00
} else {
revs = revs . slice ( ) ;
}
// Handle rare case where two revisions have been created at exactly the same millisecond
// Also handle even rarer case where a rev and its parent have been created at the
// same milliseconds. All code below expects target revision to be on top.
revs = this . moveRevisionToTop ( revision , revs ) ;
const output = {
title : '' ,
body : '' ,
metadata : { } ,
} ;
// Build up the list of revisions that are parents of the target revision.
const revIndexes = [ revs . length - 1 ] ;
let parentId = revision . parent_id ;
for ( let i = revs . length - 2 ; i >= 0 ; i -- ) {
const rev = revs [ i ] ;
if ( rev . id !== parentId ) continue ;
parentId = rev . parent_id ;
revIndexes . push ( i ) ;
}
revIndexes . reverse ( ) ;
for ( const revIndex of revIndexes ) {
const rev = revs [ revIndex ] ;
2019-07-29 15:43:53 +02:00
if ( rev . encryption_applied ) throw new JoplinError ( sprintf ( 'Revision "%s" is encrypted' , rev . id ) , 'revision_encrypted' ) ;
2019-05-06 22:35:29 +02:00
output . title = this . applyTextPatch ( output . title , rev . title_diff ) ;
output . body = this . applyTextPatch ( output . body , rev . body_diff ) ;
2022-02-26 20:20:23 +02:00
try {
output . metadata = this . applyObjectPatch ( output . metadata , rev . metadata_diff ) ;
} catch ( error ) {
error . message = ` Revision ${ rev . id } : Could not apply patch: ${ error . message } : ${ rev . metadata_diff } ` ;
throw error ;
}
2019-05-06 22:35:29 +02:00
}
return output ;
}
2021-01-22 19:41:11 +02:00
static async deleteOldRevisions ( ttl : number ) {
2019-05-06 22:35:29 +02:00
// When deleting old revisions, we need to make sure that the oldest surviving revision
// is a "merged" one (as opposed to a diff from a now deleted revision). So every time
// we deleted a revision, we need to find if there's a corresponding surviving revision
// and modify that revision into a "merged" one.
const cutOffDate = Date . now ( ) - ttl ;
const revisions = await this . modelSelectAll ( 'SELECT * FROM revisions WHERE item_updated_time < ? ORDER BY item_updated_time DESC' , [ cutOffDate ] ) ;
2021-01-22 19:41:11 +02:00
const doneItems : Record < string , boolean > = { } ;
2019-05-06 22:35:29 +02:00
for ( const rev of revisions ) {
2019-09-19 23:51:18 +02:00
const doneKey = ` ${ rev . item_type } _ ${ rev . item_id } ` ;
2019-05-06 22:35:29 +02:00
if ( doneItems [ doneKey ] ) continue ;
2019-07-29 15:43:53 +02:00
const keptRev = await this . modelSelectOne ( 'SELECT * FROM revisions WHERE item_updated_time >= ? AND item_type = ? AND item_id = ? ORDER BY item_updated_time ASC LIMIT 1' , [ cutOffDate , rev . item_type , rev . item_id ] ) ;
2019-05-06 22:35:29 +02:00
try {
const deleteQueryCondition = 'item_updated_time < ? AND item_id = ?' ;
const deleteQueryParams = [ cutOffDate , rev . item_id ] ;
2019-09-19 23:51:18 +02:00
const deleteQuery = { sql : ` DELETE FROM revisions WHERE ${ deleteQueryCondition } ` , params : deleteQueryParams } ;
2019-05-06 22:35:29 +02:00
if ( ! keptRev ) {
2019-09-19 23:51:18 +02:00
const hasEncrypted = await this . modelSelectOne ( ` SELECT * FROM revisions WHERE encryption_applied = 1 AND ${ deleteQueryCondition } ` , deleteQueryParams ) ;
2019-07-29 15:43:53 +02:00
if ( hasEncrypted ) throw new JoplinError ( 'One of the revision to be deleted is encrypted' , 'revision_encrypted' ) ;
2019-05-06 22:35:29 +02:00
await this . db ( ) . transactionExecBatch ( [ deleteQuery ] ) ;
} else {
// Note: we don't need to check for encrypted rev here because
// mergeDiff will already throw the revision_encrypted exception
// if a rev is encrypted.
const merged = await this . mergeDiffs ( keptRev ) ;
2019-07-29 15:43:53 +02:00
const queries = [ deleteQuery , { sql : 'UPDATE revisions SET title_diff = ?, body_diff = ?, metadata_diff = ? WHERE id = ?' , params : [ this . createTextPatch ( '' , merged . title ) , this . createTextPatch ( '' , merged . body ) , this . createObjectPatch ( { } , merged . metadata ) , keptRev . id ] } ] ;
2019-05-06 22:35:29 +02:00
await this . db ( ) . transactionExecBatch ( queries ) ;
}
} catch ( error ) {
if ( error . code === 'revision_encrypted' ) {
2021-01-01 15:04:04 +02:00
this . logger ( ) . info ( ` Aborted deletion of old revisions for item " ${ rev . item_id } " (rev " ${ rev . id } ") because one of the revisions is still encrypted ` , error ) ;
2019-05-06 22:35:29 +02:00
} else {
throw error ;
}
}
doneItems [ doneKey ] = true ;
}
}
2021-01-22 19:41:11 +02:00
static async revisionExists ( itemType : ModelType , itemId : string , updatedTime : number ) {
2019-05-06 22:35:29 +02:00
const existingRev = await Revision . latestRevision ( itemType , itemId ) ;
return existingRev && existingRev . item_updated_time === updatedTime ;
}
2022-02-27 12:30:40 +02:00
private static parsePatch ( patch : any ) : any [ ] {
return patch ? JSON . parse ( patch ) : [ ] ;
}
2019-05-06 22:35:29 +02:00
}