mirror of
https://github.com/Cog-Creators/Red-DiscordBot.git
synced 2025-11-06 11:18:54 -05:00
JsonIO atomic write improvements (#2132)
Use `async with Lock` instead of deprecated `with await lock` usage. Forces a file fsync prior and a directory fsync (where available) after rename to prevent issues with left behind temp files. Also should clarify: this is not threadsafe. Comments were clarified, function names remain misleading.
This commit is contained in:
parent
404800c556
commit
84ac5f3952
@ -5,8 +5,8 @@ import asyncio
|
|||||||
import logging
|
import logging
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
# This is basically our old DataIO, except that it's now threadsafe
|
# This is basically our old DataIO and just a base for much more elaborate classes
|
||||||
# and just a base for much more elaborate classes
|
# This still isn't completely threadsafe, (do not use config in threads)
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
log = logging.getLogger("red")
|
log = logging.getLogger("red")
|
||||||
@ -27,18 +27,50 @@ class JsonIO:
|
|||||||
|
|
||||||
# noinspection PyUnresolvedReferences
|
# noinspection PyUnresolvedReferences
|
||||||
def _save_json(self, data, settings=PRETTY):
|
def _save_json(self, data, settings=PRETTY):
|
||||||
|
"""
|
||||||
|
This fsync stuff here is entirely neccessary.
|
||||||
|
|
||||||
|
On windows, it is not available in entirety.
|
||||||
|
If a windows user ends up with tons of temp files, they should consider hosting on
|
||||||
|
something POSIX compatible, or using the mongo backend instead.
|
||||||
|
|
||||||
|
Most users wont encounter this issue, but with high write volumes,
|
||||||
|
without the fsync on both the temp file, and after the replace on the directory,
|
||||||
|
There's no real durability or atomicity guarantee from the filesystem.
|
||||||
|
|
||||||
|
In depth overview of underlying reasons why this is needed:
|
||||||
|
https://lwn.net/Articles/457667/
|
||||||
|
|
||||||
|
Also see:
|
||||||
|
http://man7.org/linux/man-pages/man2/open.2.html#NOTES (synchronous I/O section)
|
||||||
|
And:
|
||||||
|
https://www.mjmwired.net/kernel/Documentation/filesystems/ext4.txt#310
|
||||||
|
"""
|
||||||
log.debug("Saving file {}".format(self.path))
|
log.debug("Saving file {}".format(self.path))
|
||||||
filename = self.path.stem
|
filename = self.path.stem
|
||||||
tmp_file = "{}-{}.tmp".format(filename, uuid4().fields[0])
|
tmp_file = "{}-{}.tmp".format(filename, uuid4().fields[0])
|
||||||
tmp_path = self.path.parent / tmp_file
|
tmp_path = self.path.parent / tmp_file
|
||||||
with tmp_path.open(encoding="utf-8", mode="w") as f:
|
with tmp_path.open(encoding="utf-8", mode="w") as f:
|
||||||
json.dump(data, f, **settings)
|
json.dump(data, f, **settings)
|
||||||
|
f.flush() # This does get closed on context exit, ...
|
||||||
|
os.fsync(f.fileno()) # but that needs to happen prior to this line
|
||||||
|
|
||||||
tmp_path.replace(self.path)
|
tmp_path.replace(self.path)
|
||||||
|
|
||||||
|
# pylint: disable=E1101
|
||||||
|
try:
|
||||||
|
fd = os.open(self.path.parent, os.O_DIRECTORY)
|
||||||
|
os.fsync(fd)
|
||||||
|
except AttributeError:
|
||||||
|
fd = None
|
||||||
|
finally:
|
||||||
|
if fd is not None:
|
||||||
|
os.close(fd)
|
||||||
|
|
||||||
async def _threadsafe_save_json(self, data, settings=PRETTY):
|
async def _threadsafe_save_json(self, data, settings=PRETTY):
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
func = functools.partial(self._save_json, data, settings)
|
func = functools.partial(self._save_json, data, settings)
|
||||||
with await self._lock:
|
async with self._lock:
|
||||||
await loop.run_in_executor(None, func)
|
await loop.run_in_executor(None, func)
|
||||||
|
|
||||||
# noinspection PyUnresolvedReferences
|
# noinspection PyUnresolvedReferences
|
||||||
@ -51,6 +83,5 @@ class JsonIO:
|
|||||||
async def _threadsafe_load_json(self, path):
|
async def _threadsafe_load_json(self, path):
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
func = functools.partial(self._load_json, path)
|
func = functools.partial(self._load_json, path)
|
||||||
task = loop.run_in_executor(None, func)
|
async with self._lock:
|
||||||
with await self._lock:
|
return await loop.run_in_executor(None, func)
|
||||||
return await asyncio.wait_for(task)
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user