When I moved to ZFS I found that beets became unusably slow, to the point where scrubbing tags in a single FLAC was taking 10 or more minutes. I opened an issue, https://github.com/beetbox/beets/issues/3642, to investigate and eventually tracked it down to this portion of mutagen code:
--- modulename: flac, funcname: _writeblock
flac.py(124): data = bytearray()
flac.py(125): code = (block.code | 128) if is_last else block.code
flac.py(126): datum = block.write()
--- modulename: flac, funcname: write
flac.py(653): try:
flac.py(654): return b"\x00" * self.length
flac.py(127): size = len(datum)
flac.py(128): if size > cls._MAX_SIZE:
flac.py(138): assert not size > cls._MAX_SIZE
flac.py(139): length = struct.pack(">I", size)[-3:]
flac.py(140): data.append(code)
flac.py(141): data += length
flac.py(142): data += datum
flac.py(143): return data
flac.py(168): return data
flac.py(868): data_size = len(data)
flac.py(870): resize_bytes(filething.fileobj, available, data_size, header)
--- modulename: _util, funcname: resize_bytes
_util.py(909): if new_size < old_size:
_util.py(910): delete_size = old_size - new_size
_util.py(911): delete_at = offset + new_size
_util.py(912): delete_bytes(fobj, delete_size, delete_at)
--- modulename: _util, funcname: delete_bytes
_util.py(875): if size < 0 or offset < 0:
_util.py(878): fobj.seek(0, 2)
_util.py(879): filesize = fobj.tell()
_util.py(880): movesize = filesize - offset - size
_util.py(882): if movesize < 0:
_util.py(885): if mmap is not None:
_util.py(886): try:
_util.py(887): mmap_move(fobj, offset, offset + size, movesize)
--- modulename: _util, funcname: mmap_move
_util.py(704): assert mmap is not None, "no mmap support"
_util.py(706): if dest < 0 or src < 0 or count < 0:
_util.py(709): try:
_util.py(710): fileno = fileobj.fileno()
_util.py(715): fileobj.seek(0, 2)
_util.py(716): filesize = fileobj.tell()
_util.py(717): length = max(dest, src) + count
_util.py(719): if length > filesize:
_util.py(722): offset = ((min(dest, src) // mmap.ALLOCATIONGRANULARITY) *
_util.py(723): mmap.ALLOCATIONGRANULARITY)
_util.py(722): offset = ((min(dest, src) // mmap.ALLOCATIONGRANULARITY) *
_util.py(724): assert dest >= offset
_util.py(725): assert src >= offset
_util.py(726): assert offset % mmap.ALLOCATIONGRANULARITY == 0
_util.py(729): if count == 0:
_util.py(733): if src == dest:
_util.py(736): fileobj.flush()
_util.py(737): file_map = mmap.mmap(fileno, length - offset, offset=offset)
_util.py(738): try:
_util.py(739): file_map.move(dest - offset, src - offset, count)
On the OpenZFS IRC someone mentioned the issue seems to be the mmap based file writing mutagen is doing. ZFS isn't integrated with the linux cache, and so each mmap operation needs to be copied in and out of the cache making it painfully slow. To test this I applied the following patch to mutagen:
diff --git a/mutagen/_util.py b/mutagen/_util.py
index 1332f9d..5b9a8cd 100644
--- a/mutagen/_util.py
+++ b/mutagen/_util.py
@@ -20,7 +20,7 @@ import decimal
from io import BytesIO
try:
- import mmap
+ mmap = None
except ImportError:
# Google App Engine has no mmap:
# https://github.com/quodlibet/mutagen/issues/286
@@ -701,8 +701,6 @@ def mmap_move(fileobj, dest, src, count):
ValueError: In case invalid parameters were given
"""
- assert mmap is not None, "no mmap support"
-
if dest < 0 or src < 0 or count < 0:
raise ValueError("Invalid parameters")
diff --git a/tests/test__util.py b/tests/test__util.py
index 0ed25ed..55d0d7a 100644
--- a/tests/test__util.py
+++ b/tests/test__util.py
@@ -2,7 +2,7 @@
from mutagen._util import DictMixin, cdata, insert_bytes, delete_bytes, \
decode_terminated, dict_match, enum, get_size, BitReader, BitReaderError, \
- resize_bytes, seek_end, mmap_move, verify_fileobj, fileobj_name, \
+ resize_bytes, seek_end, verify_fileobj, fileobj_name, \
read_full, flags, resize_file, fallback_move, encode_endian, loadfile, \
intround, verify_filename
from mutagen._compat import text_type, itervalues, iterkeys, iteritems, PY2, \
@@ -376,33 +376,12 @@ class TMoveMixin(object):
self.MOVE(o, 0, 1, 2)
self.MOVE(o, 1, 0, 2)
- def test_larger_than_page_size(self):
- off = mmap.ALLOCATIONGRANULARITY
- with self.file(b"f" * off * 2) as o:
- self.MOVE(o, off, off + 1, off - 1)
- self.MOVE(o, off + 1, off, off - 1)
-
- with self.file(b"f" * off * 2 + b"x") as o:
- self.MOVE(o, off * 2 - 1, off * 2, 1)
- self.assertEqual(self.read(o)[-3:], b"fxx")
-
class Tfallback_move(TestCase, TMoveMixin):
MOVE = staticmethod(fallback_move)
-class MmapMove(TestCase, TMoveMixin):
-
- MOVE = staticmethod(mmap_move)
-
- def test_stringio(self):
- self.assertRaises(mmap.error, mmap_move, cBytesIO(), 0, 0, 0)
-
- def test_no_fileno(self):
- self.assertRaises(mmap.error, mmap_move, object(), 0, 0, 0)
-
-
class FileHandling(TestCase):
def file(self, contents):
temp = tempfile.TemporaryFile()
And the issue immediately went away.
I'd like to request for either:
mutagen
to detect ZFS and disable mmap
. I don't know if that's possible
- For consumers of mutagen to be able to specify, somehow, that they don't want to use
mmap
'd files