From 84ac5f3952bbfa4c3f68aa1768be902e8aa21fb0 Mon Sep 17 00:00:00 2001 From: Michael H Date: Mon, 24 Sep 2018 04:29:14 -0400 Subject: [PATCH] JsonIO atomic write improvements (#2132) Use `async with Lock` instead of deprecated `with await lock` usage. Forces a file fsync prior and a directory fsync (where available) after rename to prevent issues with left behind temp files. Also should clarify: this is not threadsafe. Comments were clarified, function names remain misleading. --- redbot/core/json_io.py | 43 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/redbot/core/json_io.py b/redbot/core/json_io.py index af4489052..c610d7bb1 100644 --- a/redbot/core/json_io.py +++ b/redbot/core/json_io.py @@ -5,8 +5,8 @@ import asyncio import logging from uuid import uuid4 -# This is basically our old DataIO, except that it's now threadsafe -# and just a base for much more elaborate classes +# This is basically our old DataIO and just a base for much more elaborate classes +# This still isn't completely threadsafe, (do not use config in threads) from pathlib import Path log = logging.getLogger("red") @@ -27,18 +27,50 @@ class JsonIO: # noinspection PyUnresolvedReferences def _save_json(self, data, settings=PRETTY): + """ + This fsync stuff here is entirely neccessary. + + On windows, it is not available in entirety. + If a windows user ends up with tons of temp files, they should consider hosting on + something POSIX compatible, or using the mongo backend instead. + + Most users wont encounter this issue, but with high write volumes, + without the fsync on both the temp file, and after the replace on the directory, + There's no real durability or atomicity guarantee from the filesystem. + + In depth overview of underlying reasons why this is needed: + https://lwn.net/Articles/457667/ + + Also see: + http://man7.org/linux/man-pages/man2/open.2.html#NOTES (synchronous I/O section) + And: + https://www.mjmwired.net/kernel/Documentation/filesystems/ext4.txt#310 + """ log.debug("Saving file {}".format(self.path)) filename = self.path.stem tmp_file = "{}-{}.tmp".format(filename, uuid4().fields[0]) tmp_path = self.path.parent / tmp_file with tmp_path.open(encoding="utf-8", mode="w") as f: json.dump(data, f, **settings) + f.flush() # This does get closed on context exit, ... + os.fsync(f.fileno()) # but that needs to happen prior to this line + tmp_path.replace(self.path) + # pylint: disable=E1101 + try: + fd = os.open(self.path.parent, os.O_DIRECTORY) + os.fsync(fd) + except AttributeError: + fd = None + finally: + if fd is not None: + os.close(fd) + async def _threadsafe_save_json(self, data, settings=PRETTY): loop = asyncio.get_event_loop() func = functools.partial(self._save_json, data, settings) - with await self._lock: + async with self._lock: await loop.run_in_executor(None, func) # noinspection PyUnresolvedReferences @@ -51,6 +83,5 @@ class JsonIO: async def _threadsafe_load_json(self, path): loop = asyncio.get_event_loop() func = functools.partial(self._load_json, path) - task = loop.run_in_executor(None, func) - with await self._lock: - return await asyncio.wait_for(task) + async with self._lock: + return await loop.run_in_executor(None, func)