mirror of
https://github.com/Cog-Creators/Red-DiscordBot.git
synced 2025-11-06 03:08:55 -05:00
JsonIO atomic write improvements (#2132)
Use `async with Lock` instead of deprecated `with await lock` usage. Forces a file fsync prior and a directory fsync (where available) after rename to prevent issues with left behind temp files. Also should clarify: this is not threadsafe. Comments were clarified, function names remain misleading.
This commit is contained in:
parent
404800c556
commit
84ac5f3952
@ -5,8 +5,8 @@ import asyncio
|
||||
import logging
|
||||
from uuid import uuid4
|
||||
|
||||
# This is basically our old DataIO, except that it's now threadsafe
|
||||
# and just a base for much more elaborate classes
|
||||
# This is basically our old DataIO and just a base for much more elaborate classes
|
||||
# This still isn't completely threadsafe, (do not use config in threads)
|
||||
from pathlib import Path
|
||||
|
||||
log = logging.getLogger("red")
|
||||
@ -27,18 +27,50 @@ class JsonIO:
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def _save_json(self, data, settings=PRETTY):
|
||||
"""
|
||||
This fsync stuff here is entirely neccessary.
|
||||
|
||||
On windows, it is not available in entirety.
|
||||
If a windows user ends up with tons of temp files, they should consider hosting on
|
||||
something POSIX compatible, or using the mongo backend instead.
|
||||
|
||||
Most users wont encounter this issue, but with high write volumes,
|
||||
without the fsync on both the temp file, and after the replace on the directory,
|
||||
There's no real durability or atomicity guarantee from the filesystem.
|
||||
|
||||
In depth overview of underlying reasons why this is needed:
|
||||
https://lwn.net/Articles/457667/
|
||||
|
||||
Also see:
|
||||
http://man7.org/linux/man-pages/man2/open.2.html#NOTES (synchronous I/O section)
|
||||
And:
|
||||
https://www.mjmwired.net/kernel/Documentation/filesystems/ext4.txt#310
|
||||
"""
|
||||
log.debug("Saving file {}".format(self.path))
|
||||
filename = self.path.stem
|
||||
tmp_file = "{}-{}.tmp".format(filename, uuid4().fields[0])
|
||||
tmp_path = self.path.parent / tmp_file
|
||||
with tmp_path.open(encoding="utf-8", mode="w") as f:
|
||||
json.dump(data, f, **settings)
|
||||
f.flush() # This does get closed on context exit, ...
|
||||
os.fsync(f.fileno()) # but that needs to happen prior to this line
|
||||
|
||||
tmp_path.replace(self.path)
|
||||
|
||||
# pylint: disable=E1101
|
||||
try:
|
||||
fd = os.open(self.path.parent, os.O_DIRECTORY)
|
||||
os.fsync(fd)
|
||||
except AttributeError:
|
||||
fd = None
|
||||
finally:
|
||||
if fd is not None:
|
||||
os.close(fd)
|
||||
|
||||
async def _threadsafe_save_json(self, data, settings=PRETTY):
|
||||
loop = asyncio.get_event_loop()
|
||||
func = functools.partial(self._save_json, data, settings)
|
||||
with await self._lock:
|
||||
async with self._lock:
|
||||
await loop.run_in_executor(None, func)
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
@ -51,6 +83,5 @@ class JsonIO:
|
||||
async def _threadsafe_load_json(self, path):
|
||||
loop = asyncio.get_event_loop()
|
||||
func = functools.partial(self._load_json, path)
|
||||
task = loop.run_in_executor(None, func)
|
||||
with await self._lock:
|
||||
return await asyncio.wait_for(task)
|
||||
async with self._lock:
|
||||
return await loop.run_in_executor(None, func)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user