1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-11 18:24:43 +02:00

Server: Optimize delta change query to prevent timeouts on large datasets

This commit is contained in:
Laurent Cozic 2024-04-13 14:42:11 +01:00
parent 0c640c5e77
commit 89dfbe3ec1
4 changed files with 45 additions and 3 deletions

View File

@ -31,6 +31,7 @@ describe('db', () => {
'20211030103016_item_owner_name_unique',
'20211111134329_storage_index',
'20220121172409_email_recipient_default',
'20240413141308_changes_optimization',
];
let startProcessing = false;

View File

@ -0,0 +1,19 @@
import { DbConnection, isPostgres } from '../db';
// CREATE INDEX CONCURRENTLY cannot run within a transaction
export const config = { transaction: false };
export const up = async (db: DbConnection) => {
if (isPostgres(db)) {
// This is to optimize the sub-query in ChangeModel::changesForUserQuery() which retrieves
// the item creations and deletions. We make it concurrent so that it doesn't lock this busy
// table while it's being created.
await db.raw('CREATE INDEX CONCURRENTLY IF NOT EXISTS changes_user_id_counter_index ON changes (user_id, counter)');
}
};
export const down = async (db: DbConnection) => {
if (isPostgres(db)) {
await db.raw('DROP INDEX changes_user_id_counter_index');
}
};

View File

@ -110,6 +110,22 @@ export default class ChangeModel extends BaseModel<Change> {
// UPDATE changes do not have the user_id set because they are specific
// to the item, not to a particular user.
// The extra complexity is due to the fact that items can be shared between users.
//
// - CREATE: When a user creates an item, a corresponding user_item is added to their
// collection. When that item is shared with another user, a user_item is also added to
// that user's collection, via ShareModel::updateSharedItems(). Each user has their own
// `change` object for the creation operations. For example if a user shares a note with
// two users, there will be a total of three `change` objects for that item, each
// associated with on of these users. See UserItemModel::addMulti()
//
// - DELETE: When an item is deleted, all corresponding user_items are deleted. Likewise,
// there's a `change` object per user. See UserItemModel::deleteBy()
//
// - UPDATE: Updates are different because only one `change` object will be created per
// change, even if the item is shared multiple times. This is why we need a different
// query for it. See ItemModel::save()
// This used to be just one query but it kept getting slower and slower
// as the `changes` table grew. So it is now split into two queries
// merged by a UNION ALL.
@ -144,13 +160,17 @@ export default class ChangeModel extends BaseModel<Change> {
if (!doCountQuery) subParams1.push(limit);
// The "+ 0" was added to prevent Postgres from scanning the `changes` table in `counter`
// order, which is an extremely slow query plan. With "+ 0" it went from 2 minutes to 6
// seconds for a particular query. https://dba.stackexchange.com/a/338597/37012
const subQuery2 = `
SELECT ${fieldsSql}
FROM "changes"
WHERE counter > ?
AND type = ?
AND item_id IN (SELECT item_id FROM user_items WHERE user_id = ?)
ORDER BY "counter" ASC
ORDER BY "counter" + 0 ASC
${doCountQuery ? '' : 'LIMIT ?'}
`;

View File

@ -78,9 +78,11 @@ export async function beforeAllDb(unitName: string, createDbOptions: CreateDbOpt
const tempDir = `${packageRootDir}/temp/test-${unitName}`;
await fs.mkdirp(tempDir);
// Uncomment the code below to run the test units with Postgres. Run this:
// To run the test units with Postgres. Run this:
//
// sudo docker compose -f docker-compose.db-dev.yml up
// docker compose -f docker-compose.db-dev.yml up
//
// JOPLIN_TESTS_SERVER_DB=pg yarn test
if (process.env.JOPLIN_TESTS_SERVER_DB === 'pg') {
await initConfig(Env.Dev, parseEnv({