diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f1dc5b0cd..ef2d440ba 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -16,7 +16,6 @@ redbot/core/drivers/* @tekulvw redbot/core/events.py @tekulvw redbot/core/global_checks.py @tekulvw redbot/core/i18n.py @tekulvw -redbot/core/json_io.py @tekulvw redbot/core/modlog.py @palmtree5 redbot/core/rpc.py @tekulvw redbot/core/utils/chat_formatting.py @tekulvw diff --git a/redbot/__main__.py b/redbot/__main__.py index e64de887a..602a77489 100644 --- a/redbot/__main__.py +++ b/redbot/__main__.py @@ -3,6 +3,7 @@ # Discord Version check import asyncio +import json import logging import os import sys @@ -12,7 +13,6 @@ import discord import redbot.logging from redbot.core.bot import Red, ExitCodes from redbot.core.cog_manager import CogManagerUI -from redbot.core.json_io import JsonIO from redbot.core.global_checks import init_global_checks from redbot.core.events import init_events from redbot.core.cli import interactive_config, confirm, parse_cli_flags @@ -61,7 +61,8 @@ def list_instances(): ) sys.exit(1) else: - data = JsonIO(data_manager.config_file)._load_json() + with data_manager.config_file.open(encoding="utf-8") as fs: + data = json.load(fs) text = "Configured Instances:\n\n" for instance_name in sorted(data.keys()): text += "{}\n".format(instance_name) diff --git a/redbot/core/config.py b/redbot/core/config.py index 569e33b70..00d9305a1 100644 --- a/redbot/core/config.py +++ b/redbot/core/config.py @@ -1,8 +1,8 @@ -import logging import collections -from copy import deepcopy -from typing import Any, Union, Tuple, Dict, Awaitable, AsyncContextManager, TypeVar +import logging +import pickle import weakref +from typing import Any, Union, Tuple, Dict, Awaitable, AsyncContextManager, TypeVar import discord @@ -57,7 +57,7 @@ class _ValueCtxManager(Awaitable[_T], AsyncContextManager[_T]): # pylint: disab "list or dict) in order to use a config value as " "a context manager." ) - self.__original_value = deepcopy(self.raw_value) + self.__original_value = pickle.loads(pickle.dumps(self.raw_value, -1)) return self.raw_value async def __aexit__(self, exc_type, exc, tb): @@ -204,7 +204,7 @@ class Group(Value): @property def defaults(self): - return deepcopy(self._defaults) + return pickle.loads(pickle.dumps(self._defaults, -1)) async def _get(self, default: Dict[str, Any] = ...) -> Dict[str, Any]: default = default if default is not ... else self.defaults @@ -446,7 +446,7 @@ class Group(Value): result = self.nested_update(value, defaults.get(key, {})) defaults[key] = result else: - defaults[key] = deepcopy(current[key]) + defaults[key] = pickle.loads(pickle.dumps(current[key], -1)) return defaults async def set(self, value): @@ -558,7 +558,7 @@ class Config: @property def defaults(self): - return deepcopy(self._defaults) + return pickle.loads(pickle.dumps(self._defaults, -1)) @staticmethod def _create_uuid(identifier: int): @@ -727,7 +727,7 @@ class Config: if key not in self._defaults: self._defaults[key] = {} - data = deepcopy(kwargs) + data = pickle.loads(pickle.dumps(kwargs, -1)) for k, v in data.items(): to_add = self._get_defaults_dict(k, v) @@ -988,7 +988,7 @@ class Config: pass else: for k, v in dict_.items(): - data = deepcopy(defaults) + data = pickle.loads(pickle.dumps(defaults, -1)) data.update(v) ret[int(k)] = data @@ -1066,7 +1066,7 @@ class Config: ret = {} defaults = self.defaults.get(self.MEMBER, {}) for member_id, member_data in guild_data.items(): - new_member_data = deepcopy(defaults) + new_member_data = pickle.loads(pickle.dumps(defaults, -1)) new_member_data.update(member_data) ret[int(member_id)] = new_member_data return ret diff --git a/redbot/core/core_commands.py b/redbot/core/core_commands.py index 3b4680864..7112a04ea 100644 --- a/redbot/core/core_commands.py +++ b/redbot/core/core_commands.py @@ -1359,9 +1359,9 @@ class Core(commands.Cog, CoreLogic): docs = await db[c_name].find().to_list(None) for item in docs: item_id = str(item.pop("_id")) - output = item target = JSON(c_name, item_id, data_path_override=c_data_path) - await target.jsonIO._threadsafe_save_json(output) + target.data = item + await target._save() backup_filename = "redv3-{}-{}.tar.gz".format( instance_name, ctx.message.created_at.strftime("%Y-%m-%d %H-%M-%S") ) diff --git a/redbot/core/data_manager.py b/redbot/core/data_manager.py index 0e0b58fdb..b031cd1cb 100644 --- a/redbot/core/data_manager.py +++ b/redbot/core/data_manager.py @@ -1,16 +1,17 @@ import inspect +import json import logging import os import sys import tempfile from copy import deepcopy from pathlib import Path +from typing import Any, Dict import appdirs from discord.utils import deprecated from . import commands -from .json_io import JsonIO __all__ = [ "create_temp_config", @@ -25,12 +26,15 @@ __all__ = [ log = logging.getLogger("red.data_manager") -jsonio = None basic_config = None instance_name = None -basic_config_default = {"DATA_PATH": None, "COG_PATH_APPEND": "cogs", "CORE_PATH_APPEND": "core"} +basic_config_default: Dict[str, Any] = { + "DATA_PATH": None, + "COG_PATH_APPEND": "cogs", + "CORE_PATH_APPEND": "core", +} config_dir = None appdir = appdirs.AppDirs("Red-DiscordBot") @@ -57,9 +61,13 @@ def create_temp_config(): default_dirs["STORAGE_TYPE"] = "JSON" default_dirs["STORAGE_DETAILS"] = {} - config = JsonIO(config_file)._load_json() + with config_file.open("r", encoding="utf-8") as fs: + config = json.load(fs) + config[name] = default_dirs - JsonIO(config_file)._save_json(config) + + with config_file.open("w", encoding="utf-8") as fs: + json.dump(config, fs, indent=4) def load_basic_configuration(instance_name_: str): @@ -76,23 +84,21 @@ def load_basic_configuration(instance_name_: str): The instance name given by CLI argument and created during redbot setup. """ - global jsonio global basic_config global instance_name - - jsonio = JsonIO(config_file) - instance_name = instance_name_ try: - config = jsonio._load_json() - basic_config = config[instance_name] + with config_file.open(encoding="utf-8") as fs: + config = json.load(fs) except (FileNotFoundError, KeyError): print( "You need to configure the bot instance using `redbot-setup`" " prior to running the bot." ) sys.exit(1) + else: + basic_config = config[instance_name] def _base_data_path() -> Path: diff --git a/redbot/core/drivers/red_json.py b/redbot/core/drivers/red_json.py index 7e6f7c333..f598f6cf4 100644 --- a/redbot/core/drivers/red_json.py +++ b/redbot/core/drivers/red_json.py @@ -1,9 +1,12 @@ -from pathlib import Path -import copy -import weakref +import asyncio +import json import logging - -from ..json_io import JsonIO +import os +import pickle +import weakref +from pathlib import Path +from typing import Any, Dict +from uuid import uuid4 from .red_base import BaseDriver, IdentifierData @@ -59,13 +62,10 @@ class JSON(BaseDriver): self.data_path = data_path_override else: self.data_path = Path.cwd() / "cogs" / ".data" / self.cog_name - self.data_path.mkdir(parents=True, exist_ok=True) - self.data_path = self.data_path / self.file_name - self.jsonIO = JsonIO(self.data_path) - + self._lock = asyncio.Lock() self._load_data() async def has_valid_connection(self) -> bool: @@ -90,10 +90,12 @@ class JSON(BaseDriver): return try: - self.data = self.jsonIO._load_json() + with self.data_path.open("r", encoding="utf-8") as fs: + self.data = json.load(fs) except FileNotFoundError: self.data = {} - self.jsonIO._save_json(self.data) + with self.data_path.open("w", encoding="utf-8") as fs: + json.dump(self.data, fs, indent=4) def migrate_identifier(self, raw_identifier: int): if self.unique_cog_identifier in self.data: @@ -104,7 +106,7 @@ class JSON(BaseDriver): if ident in self.data: self.data[self.unique_cog_identifier] = self.data[ident] del self.data[ident] - self.jsonIO._save_json(self.data) + _save_json(self.data_path, self.data) break async def get(self, identifier_data: IdentifierData): @@ -112,18 +114,23 @@ class JSON(BaseDriver): full_identifiers = identifier_data.to_tuple() for i in full_identifiers: partial = partial[i] - return copy.deepcopy(partial) + return pickle.loads(pickle.dumps(partial, -1)) async def set(self, identifier_data: IdentifierData, value=None): partial = self.data full_identifiers = identifier_data.to_tuple() - for i in full_identifiers[:-1]: - if i not in partial: - partial[i] = {} - partial = partial[i] + # This is both our deepcopy() and our way of making sure this value is actually JSON + # serializable. + value_copy = json.loads(json.dumps(value)) - partial[full_identifiers[-1]] = copy.deepcopy(value) - await self.jsonIO._threadsafe_save_json(self.data) + async with self._lock: + for i in full_identifiers[:-1]: + if i not in partial: + partial[i] = {} + partial = partial[i] + partial[full_identifiers[-1]] = value_copy + + await self._save() async def clear(self, identifier_data: IdentifierData): partial = self.data @@ -131,35 +138,87 @@ class JSON(BaseDriver): try: for i in full_identifiers[:-1]: partial = partial[i] - del partial[full_identifiers[-1]] except KeyError: pass else: - await self.jsonIO._threadsafe_save_json(self.data) + async with self._lock: + try: + del partial[full_identifiers[-1]] + except KeyError: + pass + else: + await self._save() async def import_data(self, cog_data, custom_group_data): - def update_write_data(identifier_data: IdentifierData, data): + def update_write_data(identifier_data: IdentifierData, _data): partial = self.data idents = identifier_data.to_tuple() for ident in idents[:-1]: if ident not in partial: partial[ident] = {} partial = partial[ident] - partial[idents[-1]] = data + partial[idents[-1]] = _data - for category, all_data in cog_data: - splitted_pkey = self._split_primary_key(category, custom_group_data, all_data) - for pkey, data in splitted_pkey: - ident_data = IdentifierData( - self.unique_cog_identifier, - category, - pkey, - (), - custom_group_data, - is_custom=category in custom_group_data, - ) - update_write_data(ident_data, data) - await self.jsonIO._threadsafe_save_json(self.data) + async with self._lock: + for category, all_data in cog_data: + splitted_pkey = self._split_primary_key(category, custom_group_data, all_data) + for pkey, data in splitted_pkey: + ident_data = IdentifierData( + self.unique_cog_identifier, + category, + pkey, + (), + custom_group_data, + is_custom=category in custom_group_data, + ) + update_write_data(ident_data, data) + await self._save() + + async def _save(self) -> None: + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, _save_json, self.data_path, self.data) def get_config_details(self): return + + +def _save_json(path: Path, data: Dict[str, Any]) -> None: + """ + This fsync stuff here is entirely neccessary. + + On windows, it is not available in entirety. + If a windows user ends up with tons of temp files, they should consider hosting on + something POSIX compatible, or using the mongo backend instead. + + Most users wont encounter this issue, but with high write volumes, + without the fsync on both the temp file, and after the replace on the directory, + There's no real durability or atomicity guarantee from the filesystem. + + In depth overview of underlying reasons why this is needed: + https://lwn.net/Articles/457667/ + + Also see: + http://man7.org/linux/man-pages/man2/open.2.html#NOTES (synchronous I/O section) + And: + https://www.mjmwired.net/kernel/Documentation/filesystems/ext4.txt#310 + """ + filename = path.stem + tmp_file = "{}-{}.tmp".format(filename, uuid4().fields[0]) + tmp_path = path.parent / tmp_file + with tmp_path.open(encoding="utf-8", mode="w") as fs: + json.dump(data, fs, indent=4) + fs.flush() # This does get closed on context exit, ... + os.fsync(fs.fileno()) # but that needs to happen prior to this line + + tmp_path.replace(path) + + try: + flag = os.O_DIRECTORY # pylint: disable=no-member + except AttributeError: + pass + else: + fd = os.open(path.parent, flag) + try: + os.fsync(fd) + finally: + os.close(fd) diff --git a/redbot/core/json_io.py b/redbot/core/json_io.py deleted file mode 100644 index d209a1ad9..000000000 --- a/redbot/core/json_io.py +++ /dev/null @@ -1,90 +0,0 @@ -import functools -import json -import os -import asyncio -import logging -from copy import deepcopy -from uuid import uuid4 - -# This is basically our old DataIO and just a base for much more elaborate classes -# This still isn't completely threadsafe, (do not use config in threads) -from pathlib import Path - -log = logging.getLogger("red") - -PRETTY = {"indent": 4, "sort_keys": False, "separators": (",", " : ")} -MINIFIED = {"sort_keys": False, "separators": (",", ":")} - - -class JsonIO: - """Basic functions for atomic saving / loading of json files""" - - def __init__(self, path: Path = Path.cwd()): - """ - :param path: Full path to file. - """ - self._lock = asyncio.Lock() - self.path = path - - # noinspection PyUnresolvedReferences - def _save_json(self, data, settings=PRETTY): - """ - This fsync stuff here is entirely neccessary. - - On windows, it is not available in entirety. - If a windows user ends up with tons of temp files, they should consider hosting on - something POSIX compatible, or using the mongo backend instead. - - Most users wont encounter this issue, but with high write volumes, - without the fsync on both the temp file, and after the replace on the directory, - There's no real durability or atomicity guarantee from the filesystem. - - In depth overview of underlying reasons why this is needed: - https://lwn.net/Articles/457667/ - - Also see: - http://man7.org/linux/man-pages/man2/open.2.html#NOTES (synchronous I/O section) - And: - https://www.mjmwired.net/kernel/Documentation/filesystems/ext4.txt#310 - """ - filename = self.path.stem - tmp_file = "{}-{}.tmp".format(filename, uuid4().fields[0]) - tmp_path = self.path.parent / tmp_file - with tmp_path.open(encoding="utf-8", mode="w") as f: - json.dump(data, f, **settings) - f.flush() # This does get closed on context exit, ... - os.fsync(f.fileno()) # but that needs to happen prior to this line - - tmp_path.replace(self.path) - - # pylint: disable=no-member - try: - fd = os.open(self.path.parent, os.O_DIRECTORY) - os.fsync(fd) - except AttributeError: - fd = None - finally: - if fd is not None: - os.close(fd) - - async def _threadsafe_save_json(self, data, settings=PRETTY): - loop = asyncio.get_event_loop() - # the deepcopy is needed here. otherwise, - # the dict can change during serialization - # and this will break the encoder. - data_copy = deepcopy(data) - func = functools.partial(self._save_json, data_copy, settings) - async with self._lock: - await loop.run_in_executor(None, func) - - # noinspection PyUnresolvedReferences - def _load_json(self): - with self.path.open(encoding="utf-8", mode="r") as f: - data = json.load(f) - return data - - async def _threadsafe_load_json(self, path): - loop = asyncio.get_event_loop() - func = functools.partial(self._load_json, path) - async with self._lock: - return await loop.run_in_executor(None, func) diff --git a/redbot/pytest/data_manager.py b/redbot/pytest/data_manager.py index d1a68c06b..5a069674f 100644 --- a/redbot/pytest/data_manager.py +++ b/redbot/pytest/data_manager.py @@ -8,7 +8,6 @@ __all__ = ["cleanup_datamanager", "data_mgr_config", "cog_instance"] @pytest.fixture(autouse=True) def cleanup_datamanager(): data_manager.basic_config = None - data_manager.jsonio = None @pytest.fixture() diff --git a/redbot/setup.py b/redbot/setup.py index 61a3b834d..20e2c848f 100644 --- a/redbot/setup.py +++ b/redbot/setup.py @@ -23,7 +23,6 @@ from redbot.core.data_manager import ( core_data_path, storage_details, ) -from redbot.core.json_io import JsonIO from redbot.core.utils import safe_delete from redbot.core import Config from redbot.core.drivers import BackendType, IdentifierData @@ -50,7 +49,8 @@ def load_existing_config(): if not config_file.exists(): return {} - return JsonIO(config_file)._load_json() + with config_file.open(encoding="utf-8") as fs: + return json.load(fs) instance_data = load_existing_config() @@ -74,7 +74,9 @@ def save_config(name, data, remove=False): print("Not continuing") sys.exit(0) config[name] = data - JsonIO(config_file)._save_json(config) + + with config_file.open("w", encoding="utf-8") as fs: + json.dump(config, fs, indent=4) def get_data_dir(): @@ -220,7 +222,7 @@ async def json_to_mongov2(instance): if "." in cog_name: # Garbage handler continue - with p.open(mode="r") as f: + with p.open(mode="r", encoding="utf-8") as f: cog_data = json.load(f) for identifier, all_data in cog_data.items(): try: