mirror of
https://github.com/Cog-Creators/Red-DiscordBot.git
synced 2025-11-06 03:08:55 -05:00
Kill JsonIO and fix JSON driver caching issues (#2796)
* Kill JsonIO and fix JSON driver caching issues Signed-off-by: Toby Harradine <tobyharradine@gmail.com> * Ensure lock covers critical region in set() Signed-off-by: Toby Harradine <tobyharradine@gmail.com> * Make tests pass Signed-off-by: Toby Harradine <tobyharradine@gmail.com> * Use pickle over deepcopy in Config Signed-off-by: Toby Harradine <tobyharradine@gmail.com> * Fix temp instance creation Signed-off-by: Toby Harradine <tobyharradine@gmail.com> * Serialise value before doing anything in set() Signed-off-by: Toby Harradine <tobyharradine@gmail.com>
This commit is contained in:
parent
f3bbfdc64d
commit
bff7e214ab
1
.github/CODEOWNERS
vendored
1
.github/CODEOWNERS
vendored
@ -16,7 +16,6 @@ redbot/core/drivers/* @tekulvw
|
||||
redbot/core/events.py @tekulvw
|
||||
redbot/core/global_checks.py @tekulvw
|
||||
redbot/core/i18n.py @tekulvw
|
||||
redbot/core/json_io.py @tekulvw
|
||||
redbot/core/modlog.py @palmtree5
|
||||
redbot/core/rpc.py @tekulvw
|
||||
redbot/core/utils/chat_formatting.py @tekulvw
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
# Discord Version check
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
@ -12,7 +13,6 @@ import discord
|
||||
import redbot.logging
|
||||
from redbot.core.bot import Red, ExitCodes
|
||||
from redbot.core.cog_manager import CogManagerUI
|
||||
from redbot.core.json_io import JsonIO
|
||||
from redbot.core.global_checks import init_global_checks
|
||||
from redbot.core.events import init_events
|
||||
from redbot.core.cli import interactive_config, confirm, parse_cli_flags
|
||||
@ -61,7 +61,8 @@ def list_instances():
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
data = JsonIO(data_manager.config_file)._load_json()
|
||||
with data_manager.config_file.open(encoding="utf-8") as fs:
|
||||
data = json.load(fs)
|
||||
text = "Configured Instances:\n\n"
|
||||
for instance_name in sorted(data.keys()):
|
||||
text += "{}\n".format(instance_name)
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
import logging
|
||||
import collections
|
||||
from copy import deepcopy
|
||||
from typing import Any, Union, Tuple, Dict, Awaitable, AsyncContextManager, TypeVar
|
||||
import logging
|
||||
import pickle
|
||||
import weakref
|
||||
from typing import Any, Union, Tuple, Dict, Awaitable, AsyncContextManager, TypeVar
|
||||
|
||||
import discord
|
||||
|
||||
@ -57,7 +57,7 @@ class _ValueCtxManager(Awaitable[_T], AsyncContextManager[_T]): # pylint: disab
|
||||
"list or dict) in order to use a config value as "
|
||||
"a context manager."
|
||||
)
|
||||
self.__original_value = deepcopy(self.raw_value)
|
||||
self.__original_value = pickle.loads(pickle.dumps(self.raw_value, -1))
|
||||
return self.raw_value
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
@ -204,7 +204,7 @@ class Group(Value):
|
||||
|
||||
@property
|
||||
def defaults(self):
|
||||
return deepcopy(self._defaults)
|
||||
return pickle.loads(pickle.dumps(self._defaults, -1))
|
||||
|
||||
async def _get(self, default: Dict[str, Any] = ...) -> Dict[str, Any]:
|
||||
default = default if default is not ... else self.defaults
|
||||
@ -446,7 +446,7 @@ class Group(Value):
|
||||
result = self.nested_update(value, defaults.get(key, {}))
|
||||
defaults[key] = result
|
||||
else:
|
||||
defaults[key] = deepcopy(current[key])
|
||||
defaults[key] = pickle.loads(pickle.dumps(current[key], -1))
|
||||
return defaults
|
||||
|
||||
async def set(self, value):
|
||||
@ -558,7 +558,7 @@ class Config:
|
||||
|
||||
@property
|
||||
def defaults(self):
|
||||
return deepcopy(self._defaults)
|
||||
return pickle.loads(pickle.dumps(self._defaults, -1))
|
||||
|
||||
@staticmethod
|
||||
def _create_uuid(identifier: int):
|
||||
@ -727,7 +727,7 @@ class Config:
|
||||
if key not in self._defaults:
|
||||
self._defaults[key] = {}
|
||||
|
||||
data = deepcopy(kwargs)
|
||||
data = pickle.loads(pickle.dumps(kwargs, -1))
|
||||
|
||||
for k, v in data.items():
|
||||
to_add = self._get_defaults_dict(k, v)
|
||||
@ -988,7 +988,7 @@ class Config:
|
||||
pass
|
||||
else:
|
||||
for k, v in dict_.items():
|
||||
data = deepcopy(defaults)
|
||||
data = pickle.loads(pickle.dumps(defaults, -1))
|
||||
data.update(v)
|
||||
ret[int(k)] = data
|
||||
|
||||
@ -1066,7 +1066,7 @@ class Config:
|
||||
ret = {}
|
||||
defaults = self.defaults.get(self.MEMBER, {})
|
||||
for member_id, member_data in guild_data.items():
|
||||
new_member_data = deepcopy(defaults)
|
||||
new_member_data = pickle.loads(pickle.dumps(defaults, -1))
|
||||
new_member_data.update(member_data)
|
||||
ret[int(member_id)] = new_member_data
|
||||
return ret
|
||||
|
||||
@ -1359,9 +1359,9 @@ class Core(commands.Cog, CoreLogic):
|
||||
docs = await db[c_name].find().to_list(None)
|
||||
for item in docs:
|
||||
item_id = str(item.pop("_id"))
|
||||
output = item
|
||||
target = JSON(c_name, item_id, data_path_override=c_data_path)
|
||||
await target.jsonIO._threadsafe_save_json(output)
|
||||
target.data = item
|
||||
await target._save()
|
||||
backup_filename = "redv3-{}-{}.tar.gz".format(
|
||||
instance_name, ctx.message.created_at.strftime("%Y-%m-%d %H-%M-%S")
|
||||
)
|
||||
|
||||
@ -1,16 +1,17 @@
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import appdirs
|
||||
from discord.utils import deprecated
|
||||
|
||||
from . import commands
|
||||
from .json_io import JsonIO
|
||||
|
||||
__all__ = [
|
||||
"create_temp_config",
|
||||
@ -25,12 +26,15 @@ __all__ = [
|
||||
|
||||
log = logging.getLogger("red.data_manager")
|
||||
|
||||
jsonio = None
|
||||
basic_config = None
|
||||
|
||||
instance_name = None
|
||||
|
||||
basic_config_default = {"DATA_PATH": None, "COG_PATH_APPEND": "cogs", "CORE_PATH_APPEND": "core"}
|
||||
basic_config_default: Dict[str, Any] = {
|
||||
"DATA_PATH": None,
|
||||
"COG_PATH_APPEND": "cogs",
|
||||
"CORE_PATH_APPEND": "core",
|
||||
}
|
||||
|
||||
config_dir = None
|
||||
appdir = appdirs.AppDirs("Red-DiscordBot")
|
||||
@ -57,9 +61,13 @@ def create_temp_config():
|
||||
default_dirs["STORAGE_TYPE"] = "JSON"
|
||||
default_dirs["STORAGE_DETAILS"] = {}
|
||||
|
||||
config = JsonIO(config_file)._load_json()
|
||||
with config_file.open("r", encoding="utf-8") as fs:
|
||||
config = json.load(fs)
|
||||
|
||||
config[name] = default_dirs
|
||||
JsonIO(config_file)._save_json(config)
|
||||
|
||||
with config_file.open("w", encoding="utf-8") as fs:
|
||||
json.dump(config, fs, indent=4)
|
||||
|
||||
|
||||
def load_basic_configuration(instance_name_: str):
|
||||
@ -76,23 +84,21 @@ def load_basic_configuration(instance_name_: str):
|
||||
The instance name given by CLI argument and created during
|
||||
redbot setup.
|
||||
"""
|
||||
global jsonio
|
||||
global basic_config
|
||||
global instance_name
|
||||
|
||||
jsonio = JsonIO(config_file)
|
||||
|
||||
instance_name = instance_name_
|
||||
|
||||
try:
|
||||
config = jsonio._load_json()
|
||||
basic_config = config[instance_name]
|
||||
with config_file.open(encoding="utf-8") as fs:
|
||||
config = json.load(fs)
|
||||
except (FileNotFoundError, KeyError):
|
||||
print(
|
||||
"You need to configure the bot instance using `redbot-setup`"
|
||||
" prior to running the bot."
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
basic_config = config[instance_name]
|
||||
|
||||
|
||||
def _base_data_path() -> Path:
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
from pathlib import Path
|
||||
import copy
|
||||
import weakref
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
||||
from ..json_io import JsonIO
|
||||
import os
|
||||
import pickle
|
||||
import weakref
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
from uuid import uuid4
|
||||
|
||||
from .red_base import BaseDriver, IdentifierData
|
||||
|
||||
@ -59,13 +62,10 @@ class JSON(BaseDriver):
|
||||
self.data_path = data_path_override
|
||||
else:
|
||||
self.data_path = Path.cwd() / "cogs" / ".data" / self.cog_name
|
||||
|
||||
self.data_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.data_path = self.data_path / self.file_name
|
||||
|
||||
self.jsonIO = JsonIO(self.data_path)
|
||||
|
||||
self._lock = asyncio.Lock()
|
||||
self._load_data()
|
||||
|
||||
async def has_valid_connection(self) -> bool:
|
||||
@ -90,10 +90,12 @@ class JSON(BaseDriver):
|
||||
return
|
||||
|
||||
try:
|
||||
self.data = self.jsonIO._load_json()
|
||||
with self.data_path.open("r", encoding="utf-8") as fs:
|
||||
self.data = json.load(fs)
|
||||
except FileNotFoundError:
|
||||
self.data = {}
|
||||
self.jsonIO._save_json(self.data)
|
||||
with self.data_path.open("w", encoding="utf-8") as fs:
|
||||
json.dump(self.data, fs, indent=4)
|
||||
|
||||
def migrate_identifier(self, raw_identifier: int):
|
||||
if self.unique_cog_identifier in self.data:
|
||||
@ -104,7 +106,7 @@ class JSON(BaseDriver):
|
||||
if ident in self.data:
|
||||
self.data[self.unique_cog_identifier] = self.data[ident]
|
||||
del self.data[ident]
|
||||
self.jsonIO._save_json(self.data)
|
||||
_save_json(self.data_path, self.data)
|
||||
break
|
||||
|
||||
async def get(self, identifier_data: IdentifierData):
|
||||
@ -112,18 +114,23 @@ class JSON(BaseDriver):
|
||||
full_identifiers = identifier_data.to_tuple()
|
||||
for i in full_identifiers:
|
||||
partial = partial[i]
|
||||
return copy.deepcopy(partial)
|
||||
return pickle.loads(pickle.dumps(partial, -1))
|
||||
|
||||
async def set(self, identifier_data: IdentifierData, value=None):
|
||||
partial = self.data
|
||||
full_identifiers = identifier_data.to_tuple()
|
||||
for i in full_identifiers[:-1]:
|
||||
if i not in partial:
|
||||
partial[i] = {}
|
||||
partial = partial[i]
|
||||
# This is both our deepcopy() and our way of making sure this value is actually JSON
|
||||
# serializable.
|
||||
value_copy = json.loads(json.dumps(value))
|
||||
|
||||
partial[full_identifiers[-1]] = copy.deepcopy(value)
|
||||
await self.jsonIO._threadsafe_save_json(self.data)
|
||||
async with self._lock:
|
||||
for i in full_identifiers[:-1]:
|
||||
if i not in partial:
|
||||
partial[i] = {}
|
||||
partial = partial[i]
|
||||
partial[full_identifiers[-1]] = value_copy
|
||||
|
||||
await self._save()
|
||||
|
||||
async def clear(self, identifier_data: IdentifierData):
|
||||
partial = self.data
|
||||
@ -131,35 +138,87 @@ class JSON(BaseDriver):
|
||||
try:
|
||||
for i in full_identifiers[:-1]:
|
||||
partial = partial[i]
|
||||
del partial[full_identifiers[-1]]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
await self.jsonIO._threadsafe_save_json(self.data)
|
||||
async with self._lock:
|
||||
try:
|
||||
del partial[full_identifiers[-1]]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
await self._save()
|
||||
|
||||
async def import_data(self, cog_data, custom_group_data):
|
||||
def update_write_data(identifier_data: IdentifierData, data):
|
||||
def update_write_data(identifier_data: IdentifierData, _data):
|
||||
partial = self.data
|
||||
idents = identifier_data.to_tuple()
|
||||
for ident in idents[:-1]:
|
||||
if ident not in partial:
|
||||
partial[ident] = {}
|
||||
partial = partial[ident]
|
||||
partial[idents[-1]] = data
|
||||
partial[idents[-1]] = _data
|
||||
|
||||
for category, all_data in cog_data:
|
||||
splitted_pkey = self._split_primary_key(category, custom_group_data, all_data)
|
||||
for pkey, data in splitted_pkey:
|
||||
ident_data = IdentifierData(
|
||||
self.unique_cog_identifier,
|
||||
category,
|
||||
pkey,
|
||||
(),
|
||||
custom_group_data,
|
||||
is_custom=category in custom_group_data,
|
||||
)
|
||||
update_write_data(ident_data, data)
|
||||
await self.jsonIO._threadsafe_save_json(self.data)
|
||||
async with self._lock:
|
||||
for category, all_data in cog_data:
|
||||
splitted_pkey = self._split_primary_key(category, custom_group_data, all_data)
|
||||
for pkey, data in splitted_pkey:
|
||||
ident_data = IdentifierData(
|
||||
self.unique_cog_identifier,
|
||||
category,
|
||||
pkey,
|
||||
(),
|
||||
custom_group_data,
|
||||
is_custom=category in custom_group_data,
|
||||
)
|
||||
update_write_data(ident_data, data)
|
||||
await self._save()
|
||||
|
||||
async def _save(self) -> None:
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(None, _save_json, self.data_path, self.data)
|
||||
|
||||
def get_config_details(self):
|
||||
return
|
||||
|
||||
|
||||
def _save_json(path: Path, data: Dict[str, Any]) -> None:
|
||||
"""
|
||||
This fsync stuff here is entirely neccessary.
|
||||
|
||||
On windows, it is not available in entirety.
|
||||
If a windows user ends up with tons of temp files, they should consider hosting on
|
||||
something POSIX compatible, or using the mongo backend instead.
|
||||
|
||||
Most users wont encounter this issue, but with high write volumes,
|
||||
without the fsync on both the temp file, and after the replace on the directory,
|
||||
There's no real durability or atomicity guarantee from the filesystem.
|
||||
|
||||
In depth overview of underlying reasons why this is needed:
|
||||
https://lwn.net/Articles/457667/
|
||||
|
||||
Also see:
|
||||
http://man7.org/linux/man-pages/man2/open.2.html#NOTES (synchronous I/O section)
|
||||
And:
|
||||
https://www.mjmwired.net/kernel/Documentation/filesystems/ext4.txt#310
|
||||
"""
|
||||
filename = path.stem
|
||||
tmp_file = "{}-{}.tmp".format(filename, uuid4().fields[0])
|
||||
tmp_path = path.parent / tmp_file
|
||||
with tmp_path.open(encoding="utf-8", mode="w") as fs:
|
||||
json.dump(data, fs, indent=4)
|
||||
fs.flush() # This does get closed on context exit, ...
|
||||
os.fsync(fs.fileno()) # but that needs to happen prior to this line
|
||||
|
||||
tmp_path.replace(path)
|
||||
|
||||
try:
|
||||
flag = os.O_DIRECTORY # pylint: disable=no-member
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
fd = os.open(path.parent, flag)
|
||||
try:
|
||||
os.fsync(fd)
|
||||
finally:
|
||||
os.close(fd)
|
||||
|
||||
@ -1,90 +0,0 @@
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
from uuid import uuid4
|
||||
|
||||
# This is basically our old DataIO and just a base for much more elaborate classes
|
||||
# This still isn't completely threadsafe, (do not use config in threads)
|
||||
from pathlib import Path
|
||||
|
||||
log = logging.getLogger("red")
|
||||
|
||||
PRETTY = {"indent": 4, "sort_keys": False, "separators": (",", " : ")}
|
||||
MINIFIED = {"sort_keys": False, "separators": (",", ":")}
|
||||
|
||||
|
||||
class JsonIO:
|
||||
"""Basic functions for atomic saving / loading of json files"""
|
||||
|
||||
def __init__(self, path: Path = Path.cwd()):
|
||||
"""
|
||||
:param path: Full path to file.
|
||||
"""
|
||||
self._lock = asyncio.Lock()
|
||||
self.path = path
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def _save_json(self, data, settings=PRETTY):
|
||||
"""
|
||||
This fsync stuff here is entirely neccessary.
|
||||
|
||||
On windows, it is not available in entirety.
|
||||
If a windows user ends up with tons of temp files, they should consider hosting on
|
||||
something POSIX compatible, or using the mongo backend instead.
|
||||
|
||||
Most users wont encounter this issue, but with high write volumes,
|
||||
without the fsync on both the temp file, and after the replace on the directory,
|
||||
There's no real durability or atomicity guarantee from the filesystem.
|
||||
|
||||
In depth overview of underlying reasons why this is needed:
|
||||
https://lwn.net/Articles/457667/
|
||||
|
||||
Also see:
|
||||
http://man7.org/linux/man-pages/man2/open.2.html#NOTES (synchronous I/O section)
|
||||
And:
|
||||
https://www.mjmwired.net/kernel/Documentation/filesystems/ext4.txt#310
|
||||
"""
|
||||
filename = self.path.stem
|
||||
tmp_file = "{}-{}.tmp".format(filename, uuid4().fields[0])
|
||||
tmp_path = self.path.parent / tmp_file
|
||||
with tmp_path.open(encoding="utf-8", mode="w") as f:
|
||||
json.dump(data, f, **settings)
|
||||
f.flush() # This does get closed on context exit, ...
|
||||
os.fsync(f.fileno()) # but that needs to happen prior to this line
|
||||
|
||||
tmp_path.replace(self.path)
|
||||
|
||||
# pylint: disable=no-member
|
||||
try:
|
||||
fd = os.open(self.path.parent, os.O_DIRECTORY)
|
||||
os.fsync(fd)
|
||||
except AttributeError:
|
||||
fd = None
|
||||
finally:
|
||||
if fd is not None:
|
||||
os.close(fd)
|
||||
|
||||
async def _threadsafe_save_json(self, data, settings=PRETTY):
|
||||
loop = asyncio.get_event_loop()
|
||||
# the deepcopy is needed here. otherwise,
|
||||
# the dict can change during serialization
|
||||
# and this will break the encoder.
|
||||
data_copy = deepcopy(data)
|
||||
func = functools.partial(self._save_json, data_copy, settings)
|
||||
async with self._lock:
|
||||
await loop.run_in_executor(None, func)
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def _load_json(self):
|
||||
with self.path.open(encoding="utf-8", mode="r") as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
async def _threadsafe_load_json(self, path):
|
||||
loop = asyncio.get_event_loop()
|
||||
func = functools.partial(self._load_json, path)
|
||||
async with self._lock:
|
||||
return await loop.run_in_executor(None, func)
|
||||
@ -8,7 +8,6 @@ __all__ = ["cleanup_datamanager", "data_mgr_config", "cog_instance"]
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup_datamanager():
|
||||
data_manager.basic_config = None
|
||||
data_manager.jsonio = None
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
||||
@ -23,7 +23,6 @@ from redbot.core.data_manager import (
|
||||
core_data_path,
|
||||
storage_details,
|
||||
)
|
||||
from redbot.core.json_io import JsonIO
|
||||
from redbot.core.utils import safe_delete
|
||||
from redbot.core import Config
|
||||
from redbot.core.drivers import BackendType, IdentifierData
|
||||
@ -50,7 +49,8 @@ def load_existing_config():
|
||||
if not config_file.exists():
|
||||
return {}
|
||||
|
||||
return JsonIO(config_file)._load_json()
|
||||
with config_file.open(encoding="utf-8") as fs:
|
||||
return json.load(fs)
|
||||
|
||||
|
||||
instance_data = load_existing_config()
|
||||
@ -74,7 +74,9 @@ def save_config(name, data, remove=False):
|
||||
print("Not continuing")
|
||||
sys.exit(0)
|
||||
config[name] = data
|
||||
JsonIO(config_file)._save_json(config)
|
||||
|
||||
with config_file.open("w", encoding="utf-8") as fs:
|
||||
json.dump(config, fs, indent=4)
|
||||
|
||||
|
||||
def get_data_dir():
|
||||
@ -220,7 +222,7 @@ async def json_to_mongov2(instance):
|
||||
if "." in cog_name:
|
||||
# Garbage handler
|
||||
continue
|
||||
with p.open(mode="r") as f:
|
||||
with p.open(mode="r", encoding="utf-8") as f:
|
||||
cog_data = json.load(f)
|
||||
for identifier, all_data in cog_data.items():
|
||||
try:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user