2021-11-19 18:22:27 +01:00
import { promises as fs , createWriteStream } from 'fs' ;
import path from 'path' ;
2021-11-19 18:13:45 +01:00
import { inject , injectable } from 'inversify' ;
import { TYPES } from '../types.js' ;
import Config from './config.js' ;
2021-12-03 05:01:35 +01:00
import PQueue from 'p-queue' ;
2021-12-03 07:26:36 +01:00
import debug from '../utils/debug.js' ;
2022-01-05 21:30:32 +01:00
import { prisma } from '../utils/db.js' ;
import { FileCache } from '@prisma/client' ;
2021-11-19 18:13:45 +01:00
@injectable ( )
export default class FileCacheProvider {
2021-12-03 17:28:50 +01:00
private static readonly evictionQueue = new PQueue ( { concurrency : 1 } ) ;
2021-11-19 18:13:45 +01:00
private readonly config : Config ;
constructor ( @inject ( TYPES . Config ) config : Config ) {
this . config = config ;
}
/ * *
2023-05-14 03:34:29 +02:00
* Returns path to cached file if it exists , otherwise returns null .
2021-11-19 18:13:45 +01:00
* Updates the ` accessedAt ` property of the cached file .
* @param hash lookup key
* /
2023-05-14 03:34:29 +02:00
async getPathFor ( hash : string ) : Promise < string | null > {
2022-01-05 21:30:32 +01:00
const model = await prisma . fileCache . findUnique ( {
where : {
hash ,
} ,
} ) ;
2021-11-19 18:13:45 +01:00
if ( ! model ) {
2023-05-14 03:34:29 +02:00
return null ;
2021-11-19 18:13:45 +01:00
}
const resolvedPath = path . join ( this . config . CACHE_DIR , hash ) ;
try {
await fs . access ( resolvedPath ) ;
} catch ( _ : unknown ) {
2022-01-05 21:30:32 +01:00
await prisma . fileCache . delete ( {
where : {
hash ,
} ,
} ) ;
2021-11-19 18:13:45 +01:00
2023-05-14 03:34:29 +02:00
return null ;
2021-11-19 18:13:45 +01:00
}
2022-01-05 21:30:32 +01:00
await prisma . fileCache . update ( {
where : {
hash ,
} ,
data : {
accessedAt : new Date ( ) ,
} ,
} ) ;
2021-11-19 18:13:45 +01:00
return resolvedPath ;
}
/ * *
* Returns a write stream for the given hash key .
* The stream handles saving a new file and will
2021-11-24 19:03:25 +01:00
* update the database after the stream is closed .
2021-11-19 18:13:45 +01:00
* @param hash lookup key
* /
createWriteStream ( hash : string ) {
const tmpPath = path . join ( this . config . CACHE_DIR , 'tmp' , hash ) ;
const finalPath = path . join ( this . config . CACHE_DIR , hash ) ;
const stream = createWriteStream ( tmpPath ) ;
2021-11-24 19:03:25 +01:00
stream . on ( 'close' , async ( ) = > {
2021-11-19 18:13:45 +01:00
// Only move if size is non-zero (may have errored out)
const stats = await fs . stat ( tmpPath ) ;
if ( stats . size !== 0 ) {
2023-05-14 03:34:29 +02:00
await fs . rename ( tmpPath , finalPath ) ;
await prisma . fileCache . create ( {
data : {
hash ,
accessedAt : new Date ( ) ,
bytes : stats.size ,
} ,
} ) ;
2021-11-30 08:39:40 +01:00
}
2021-11-19 18:13:45 +01:00
2021-12-03 16:45:09 +01:00
await this . evictOldestIfNecessary ( ) ;
2021-11-19 18:13:45 +01:00
} ) ;
return stream ;
}
/ * *
* Deletes orphaned cache files and evicts files if
* necessary . Should be run on program startup so files
* will be evicted if the cache limit has changed .
* /
async cleanup() {
await this . removeOrphans ( ) ;
2021-12-03 16:45:09 +01:00
await this . evictOldestIfNecessary ( ) ;
2021-11-19 18:13:45 +01:00
}
2021-12-03 16:45:09 +01:00
private async evictOldestIfNecessary() {
2021-12-03 17:28:50 +01:00
void FileCacheProvider . evictionQueue . add ( this . evictOldest . bind ( this ) ) ;
2021-12-03 16:45:09 +01:00
2021-12-03 17:28:50 +01:00
return FileCacheProvider . evictionQueue . onEmpty ( ) ;
2021-12-03 05:01:35 +01:00
}
private async evictOldest() {
2021-12-03 16:52:30 +01:00
debug ( 'Evicting oldest files...' ) ;
2021-11-19 18:13:45 +01:00
2021-12-03 17:06:56 +01:00
let totalSizeBytes = await this . getDiskUsageInBytes ( ) ;
let numOfEvictedFiles = 0 ;
// Continue to evict until we're under the limit
/* eslint-disable no-await-in-loop */
while ( totalSizeBytes > this . config . CACHE_LIMIT_IN_BYTES ) {
2022-01-05 21:30:32 +01:00
const oldest = await prisma . fileCache . findFirst ( {
orderBy : {
accessedAt : 'asc' ,
} ,
2021-11-19 18:13:45 +01:00
} ) ;
if ( oldest ) {
2022-01-05 21:30:32 +01:00
await prisma . fileCache . delete ( {
where : {
hash : oldest.hash ,
} ,
} ) ;
2021-11-19 18:13:45 +01:00
await fs . unlink ( path . join ( this . config . CACHE_DIR , oldest . hash ) ) ;
2021-12-03 16:52:30 +01:00
debug ( ` ${ oldest . hash } has been evicted ` ) ;
2021-12-03 17:06:56 +01:00
numOfEvictedFiles ++ ;
2021-11-19 18:13:45 +01:00
}
2021-12-03 17:06:56 +01:00
totalSizeBytes = await this . getDiskUsageInBytes ( ) ;
}
/* eslint-enable no-await-in-loop */
if ( numOfEvictedFiles > 0 ) {
debug ( ` ${ numOfEvictedFiles } files have been evicted ` ) ;
2021-12-03 16:52:30 +01:00
} else {
debug ( ` No files needed to be evicted. Total size of the cache is currently ${ totalSizeBytes } bytes, and the cache limit is ${ this . config . CACHE_LIMIT_IN_BYTES } bytes. ` ) ;
2021-11-19 18:13:45 +01:00
}
}
private async removeOrphans() {
2021-12-08 02:36:06 +01:00
// Check filesystem direction (do files exist on the disk but not in the database?)
2021-11-19 18:22:27 +01:00
for await ( const dirent of await fs . opendir ( this . config . CACHE_DIR ) ) {
if ( dirent . isFile ( ) ) {
2022-01-05 21:30:32 +01:00
const model = await prisma . fileCache . findUnique ( {
where : {
hash : dirent.name ,
} ,
} ) ;
2021-11-19 18:22:27 +01:00
if ( ! model ) {
2021-12-08 02:36:06 +01:00
debug ( ` ${ dirent . name } was present on disk but was not in the database. Removing from disk. ` ) ;
2021-11-19 18:22:27 +01:00
await fs . unlink ( path . join ( this . config . CACHE_DIR , dirent . name ) ) ;
}
}
}
2021-12-08 02:36:06 +01:00
// Check database direction (do entries exist in the database but not on the disk?)
for await ( const model of this . getFindAllIterable ( ) ) {
const filePath = path . join ( this . config . CACHE_DIR , model . hash ) ;
try {
await fs . access ( filePath ) ;
} catch {
debug ( ` ${ model . hash } was present in database but was not on disk. Removing from database. ` ) ;
2022-01-05 21:30:32 +01:00
await prisma . fileCache . delete ( {
where : {
hash : model.hash ,
} ,
} ) ;
2021-12-08 02:36:06 +01:00
}
}
2021-11-19 18:13:45 +01:00
}
2021-12-03 17:06:56 +01:00
/ * *
* Pulls from the database rather than the filesystem ,
* so may be slightly inaccurate .
* @returns the total size of the cache in bytes
* /
private async getDiskUsageInBytes() {
2022-01-05 21:30:32 +01:00
const data = await prisma . fileCache . aggregate ( {
_sum : {
bytes : true ,
} ,
} ) ;
const totalSizeBytes = data . _sum . bytes ? ? 0 ;
2021-12-03 17:06:56 +01:00
return totalSizeBytes ;
}
2021-12-08 02:36:06 +01:00
/ * *
* An efficient way to iterate over all rows .
* @returns an iterable for the result of FileCache . findAll ( )
* /
private getFindAllIterable() {
const limit = 50 ;
let previousCreatedAt : Date | null = null ;
let models : FileCache [ ] = [ ] ;
const fetchNextBatch = async ( ) = > {
2022-01-05 21:30:32 +01:00
let where ;
2021-12-08 02:36:06 +01:00
if ( previousCreatedAt ) {
where = {
createdAt : {
2022-01-05 21:30:32 +01:00
gt : previousCreatedAt ,
2021-12-08 02:36:06 +01:00
} ,
} ;
}
2022-01-05 21:30:32 +01:00
models = await prisma . fileCache . findMany ( {
2021-12-08 02:36:06 +01:00
where ,
2022-01-05 21:30:32 +01:00
orderBy : {
createdAt : 'asc' ,
} ,
take : limit ,
2021-12-08 02:36:06 +01:00
} ) ;
if ( models . length > 0 ) {
2022-01-05 21:30:32 +01:00
previousCreatedAt = models [ models . length - 1 ] . createdAt ;
2021-12-08 02:36:06 +01:00
}
} ;
return {
[ Symbol . asyncIterator ] ( ) {
return {
async next() {
if ( models . length === 0 ) {
await fetchNextBatch ( ) ;
}
if ( models . length === 0 ) {
// Must return value here for types to be inferred correctly
return { done : true , value : null as unknown as FileCache } ;
}
return { value : models.shift ( ) ! , done : false } ;
} ,
} ;
} ,
} ;
}
2021-11-19 18:13:45 +01:00
}