From 89dfbe3ec19afd3edc6dae3a3a3078cd988de225 Mon Sep 17 00:00:00 2001 From: Laurent Cozic Date: Sat, 13 Apr 2024 14:42:11 +0100 Subject: [PATCH] Server: Optimize delta change query to prevent timeouts on large datasets --- packages/server/src/db.test.ts | 1 + .../20240413141308_changes_optimization.ts | 19 ++++++++++++++++ packages/server/src/models/ChangeModel.ts | 22 ++++++++++++++++++- .../server/src/utils/testing/testUtils.ts | 6 +++-- 4 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 packages/server/src/migrations/20240413141308_changes_optimization.ts diff --git a/packages/server/src/db.test.ts b/packages/server/src/db.test.ts index 7d01e2098..b11ef2cc9 100644 --- a/packages/server/src/db.test.ts +++ b/packages/server/src/db.test.ts @@ -31,6 +31,7 @@ describe('db', () => { '20211030103016_item_owner_name_unique', '20211111134329_storage_index', '20220121172409_email_recipient_default', + '20240413141308_changes_optimization', ]; let startProcessing = false; diff --git a/packages/server/src/migrations/20240413141308_changes_optimization.ts b/packages/server/src/migrations/20240413141308_changes_optimization.ts new file mode 100644 index 000000000..4584abcc7 --- /dev/null +++ b/packages/server/src/migrations/20240413141308_changes_optimization.ts @@ -0,0 +1,19 @@ +import { DbConnection, isPostgres } from '../db'; + +// CREATE INDEX CONCURRENTLY cannot run within a transaction +export const config = { transaction: false }; + +export const up = async (db: DbConnection) => { + if (isPostgres(db)) { + // This is to optimize the sub-query in ChangeModel::changesForUserQuery() which retrieves + // the item creations and deletions. We make it concurrent so that it doesn't lock this busy + // table while it's being created. + await db.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS changes_user_id_counter_index ON changes (user_id, counter)'); + } +}; + +export const down = async (db: DbConnection) => { + if (isPostgres(db)) { + await db.raw('DROP INDEX changes_user_id_counter_index'); + } +}; diff --git a/packages/server/src/models/ChangeModel.ts b/packages/server/src/models/ChangeModel.ts index a56dd1208..58e5aa5f1 100644 --- a/packages/server/src/models/ChangeModel.ts +++ b/packages/server/src/models/ChangeModel.ts @@ -110,6 +110,22 @@ export default class ChangeModel extends BaseModel { // UPDATE changes do not have the user_id set because they are specific // to the item, not to a particular user. + // The extra complexity is due to the fact that items can be shared between users. + // + // - CREATE: When a user creates an item, a corresponding user_item is added to their + // collection. When that item is shared with another user, a user_item is also added to + // that user's collection, via ShareModel::updateSharedItems(). Each user has their own + // `change` object for the creation operations. For example if a user shares a note with + // two users, there will be a total of three `change` objects for that item, each + // associated with on of these users. See UserItemModel::addMulti() + // + // - DELETE: When an item is deleted, all corresponding user_items are deleted. Likewise, + // there's a `change` object per user. See UserItemModel::deleteBy() + // + // - UPDATE: Updates are different because only one `change` object will be created per + // change, even if the item is shared multiple times. This is why we need a different + // query for it. See ItemModel::save() + // This used to be just one query but it kept getting slower and slower // as the `changes` table grew. So it is now split into two queries // merged by a UNION ALL. @@ -144,13 +160,17 @@ export default class ChangeModel extends BaseModel { if (!doCountQuery) subParams1.push(limit); + // The "+ 0" was added to prevent Postgres from scanning the `changes` table in `counter` + // order, which is an extremely slow query plan. With "+ 0" it went from 2 minutes to 6 + // seconds for a particular query. https://dba.stackexchange.com/a/338597/37012 + const subQuery2 = ` SELECT ${fieldsSql} FROM "changes" WHERE counter > ? AND type = ? AND item_id IN (SELECT item_id FROM user_items WHERE user_id = ?) - ORDER BY "counter" ASC + ORDER BY "counter" + 0 ASC ${doCountQuery ? '' : 'LIMIT ?'} `; diff --git a/packages/server/src/utils/testing/testUtils.ts b/packages/server/src/utils/testing/testUtils.ts index f325b7ad8..beacdadce 100644 --- a/packages/server/src/utils/testing/testUtils.ts +++ b/packages/server/src/utils/testing/testUtils.ts @@ -78,9 +78,11 @@ export async function beforeAllDb(unitName: string, createDbOptions: CreateDbOpt const tempDir = `${packageRootDir}/temp/test-${unitName}`; await fs.mkdirp(tempDir); - // Uncomment the code below to run the test units with Postgres. Run this: + // To run the test units with Postgres. Run this: // - // sudo docker compose -f docker-compose.db-dev.yml up + // docker compose -f docker-compose.db-dev.yml up + // + // JOPLIN_TESTS_SERVER_DB=pg yarn test if (process.env.JOPLIN_TESTS_SERVER_DB === 'pg') { await initConfig(Env.Dev, parseEnv({