muse/src/services/file-cache.ts

257 lines
6.6 KiB
TypeScript
Raw Normal View History

2021-11-19 18:22:27 +01:00
import {promises as fs, createWriteStream} from 'fs';
import path from 'path';
import {inject, injectable} from 'inversify';
import {TYPES} from '../types.js';
import Config from './config.js';
import PQueue from 'p-queue';
2021-12-03 07:26:36 +01:00
import debug from '../utils/debug.js';
2022-01-05 21:30:32 +01:00
import {prisma} from '../utils/db.js';
import {FileCache} from '@prisma/client';
@injectable()
export default class FileCacheProvider {
2021-12-03 17:28:50 +01:00
private static readonly evictionQueue = new PQueue({concurrency: 1});
private readonly config: Config;
constructor(@inject(TYPES.Config) config: Config) {
this.config = config;
}
/**
* Returns path to cached file if it exists, otherwise throws an error.
* Updates the `accessedAt` property of the cached file.
* @param hash lookup key
*/
async getPathFor(hash: string): Promise<string> {
2022-01-05 21:30:32 +01:00
const model = await prisma.fileCache.findUnique({
where: {
hash,
},
});
if (!model) {
throw new Error('File is not cached');
}
const resolvedPath = path.join(this.config.CACHE_DIR, hash);
try {
await fs.access(resolvedPath);
} catch (_: unknown) {
2022-01-05 21:30:32 +01:00
await prisma.fileCache.delete({
where: {
hash,
},
});
throw new Error('File is not cached');
}
2022-01-05 21:30:32 +01:00
await prisma.fileCache.update({
where: {
hash,
},
data: {
accessedAt: new Date(),
},
});
return resolvedPath;
}
/**
* Returns a write stream for the given hash key.
* The stream handles saving a new file and will
2021-11-24 19:03:25 +01:00
* update the database after the stream is closed.
* @param hash lookup key
*/
createWriteStream(hash: string) {
const tmpPath = path.join(this.config.CACHE_DIR, 'tmp', hash);
const finalPath = path.join(this.config.CACHE_DIR, hash);
const stream = createWriteStream(tmpPath);
2021-11-24 19:03:25 +01:00
stream.on('close', async () => {
// Only move if size is non-zero (may have errored out)
const stats = await fs.stat(tmpPath);
if (stats.size !== 0) {
try {
await fs.rename(tmpPath, finalPath);
2022-01-05 21:30:32 +01:00
await prisma.fileCache.create({
data: {
hash,
accessedAt: new Date(),
bytes: stats.size,
},
});
} catch (error) {
debug('Errored when moving a finished cache file:', error);
}
}
2021-12-03 16:45:09 +01:00
await this.evictOldestIfNecessary();
});
return stream;
}
/**
* Deletes orphaned cache files and evicts files if
* necessary. Should be run on program startup so files
* will be evicted if the cache limit has changed.
*/
async cleanup() {
await this.removeOrphans();
2021-12-03 16:45:09 +01:00
await this.evictOldestIfNecessary();
}
2021-12-03 16:45:09 +01:00
private async evictOldestIfNecessary() {
2021-12-03 17:28:50 +01:00
void FileCacheProvider.evictionQueue.add(this.evictOldest.bind(this));
2021-12-03 16:45:09 +01:00
2021-12-03 17:28:50 +01:00
return FileCacheProvider.evictionQueue.onEmpty();
}
private async evictOldest() {
2021-12-03 16:52:30 +01:00
debug('Evicting oldest files...');
2021-12-03 17:06:56 +01:00
let totalSizeBytes = await this.getDiskUsageInBytes();
let numOfEvictedFiles = 0;
// Continue to evict until we're under the limit
/* eslint-disable no-await-in-loop */
while (totalSizeBytes > this.config.CACHE_LIMIT_IN_BYTES) {
2022-01-05 21:30:32 +01:00
const oldest = await prisma.fileCache.findFirst({
orderBy: {
accessedAt: 'asc',
},
});
if (oldest) {
2022-01-05 21:30:32 +01:00
await prisma.fileCache.delete({
where: {
hash: oldest.hash,
},
});
await fs.unlink(path.join(this.config.CACHE_DIR, oldest.hash));
2021-12-03 16:52:30 +01:00
debug(`${oldest.hash} has been evicted`);
2021-12-03 17:06:56 +01:00
numOfEvictedFiles++;
}
2021-12-03 17:06:56 +01:00
totalSizeBytes = await this.getDiskUsageInBytes();
}
/* eslint-enable no-await-in-loop */
if (numOfEvictedFiles > 0) {
debug(`${numOfEvictedFiles} files have been evicted`);
2021-12-03 16:52:30 +01:00
} else {
debug(`No files needed to be evicted. Total size of the cache is currently ${totalSizeBytes} bytes, and the cache limit is ${this.config.CACHE_LIMIT_IN_BYTES} bytes.`);
}
}
private async removeOrphans() {
2021-12-08 02:36:06 +01:00
// Check filesystem direction (do files exist on the disk but not in the database?)
2021-11-19 18:22:27 +01:00
for await (const dirent of await fs.opendir(this.config.CACHE_DIR)) {
if (dirent.isFile()) {
2022-01-05 21:30:32 +01:00
const model = await prisma.fileCache.findUnique({
where: {
hash: dirent.name,
},
});
2021-11-19 18:22:27 +01:00
if (!model) {
2021-12-08 02:36:06 +01:00
debug(`${dirent.name} was present on disk but was not in the database. Removing from disk.`);
2021-11-19 18:22:27 +01:00
await fs.unlink(path.join(this.config.CACHE_DIR, dirent.name));
}
}
}
2021-12-08 02:36:06 +01:00
// Check database direction (do entries exist in the database but not on the disk?)
for await (const model of this.getFindAllIterable()) {
const filePath = path.join(this.config.CACHE_DIR, model.hash);
try {
await fs.access(filePath);
} catch {
debug(`${model.hash} was present in database but was not on disk. Removing from database.`);
2022-01-05 21:30:32 +01:00
await prisma.fileCache.delete({
where: {
hash: model.hash,
},
});
2021-12-08 02:36:06 +01:00
}
}
}
2021-12-03 17:06:56 +01:00
/**
* Pulls from the database rather than the filesystem,
* so may be slightly inaccurate.
* @returns the total size of the cache in bytes
*/
private async getDiskUsageInBytes() {
2022-01-05 21:30:32 +01:00
const data = await prisma.fileCache.aggregate({
_sum: {
bytes: true,
},
});
const totalSizeBytes = data._sum.bytes ?? 0;
2021-12-03 17:06:56 +01:00
return totalSizeBytes;
}
2021-12-08 02:36:06 +01:00
/**
* An efficient way to iterate over all rows.
* @returns an iterable for the result of FileCache.findAll()
*/
private getFindAllIterable() {
const limit = 50;
let previousCreatedAt: Date | null = null;
let models: FileCache[] = [];
const fetchNextBatch = async () => {
2022-01-05 21:30:32 +01:00
let where;
2021-12-08 02:36:06 +01:00
if (previousCreatedAt) {
where = {
createdAt: {
2022-01-05 21:30:32 +01:00
gt: previousCreatedAt,
2021-12-08 02:36:06 +01:00
},
};
}
2022-01-05 21:30:32 +01:00
models = await prisma.fileCache.findMany({
2021-12-08 02:36:06 +01:00
where,
2022-01-05 21:30:32 +01:00
orderBy: {
createdAt: 'asc',
},
take: limit,
2021-12-08 02:36:06 +01:00
});
if (models.length > 0) {
2022-01-05 21:30:32 +01:00
previousCreatedAt = models[models.length - 1].createdAt;
2021-12-08 02:36:06 +01:00
}
};
return {
[Symbol.asyncIterator]() {
return {
async next() {
if (models.length === 0) {
await fetchNextBatch();
}
if (models.length === 0) {
// Must return value here for types to be inferred correctly
return {done: true, value: null as unknown as FileCache};
}
return {value: models.shift()!, done: false};
},
};
},
};
}
}