I just tried installing the library and running the tutorial, per the docs.
import pandas as pd
from deduplipy.datasets import load_data
df = load_data()
This gave the following error:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
/tmp/ipykernel_3092214/1389887512.py in <module>
----> 1 df = load_data()
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/deduplipy/datasets.py in load_data(kind)
36 return load_stoxx50()
37 elif kind == 'voters':
---> 38 return load_voters()
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/deduplipy/datasets.py in load_voters()
14 def load_voters() -> pd.DataFrame:
15 file_path = resource_filename('deduplipy', os.path.join('data', 'voter_names.csv'))
---> 16 df = pd.read_csv(file_path)
17 print("Column names: 'name', 'suburb', 'postcode'")
18 return df
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
584 kwds.update(kwds_defaults)
585
--> 586 return _read(filepath_or_buffer, kwds)
587
588
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
480
481 # Create the parser.
--> 482 parser = TextFileReader(filepath_or_buffer, **kwds)
483
484 if chunksize or iterator:
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
809 self.options["has_index_names"] = kwds["has_index_names"]
810
--> 811 self._engine = self._make_engine(self.engine)
812
813 def close(self):
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/pandas/io/parsers/readers.py in _make_engine(self, engine)
1038 )
1039 # error: Too many arguments for "ParserBase"
-> 1040 return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
1041
1042 def _failover_to_python(self):
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/pandas/io/parsers/c_parser_wrapper.py in __init__(self, src, **kwds)
49
50 # open handles
---> 51 self._open_handles(src, kwds)
52 assert self.handles is not None
53
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/pandas/io/parsers/base_parser.py in _open_handles(self, src, kwds)
227 memory_map=kwds.get("memory_map", False),
228 storage_options=kwds.get("storage_options", None),
--> 229 errors=kwds.get("encoding_errors", "strict"),
230 )
231
~/Development/calm-notebooks/venv/lib/python3.7/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
704 encoding=ioargs.encoding,
705 errors=errors,
--> 706 newline="",
707 )
708 else:
FileNotFoundError: [Errno 2] No such file or directory: '/home/vincent/Development/calm-notebooks/venv/lib/python3.7/site-packages/deduplipy/data/voter_names.csv'
Here's my watermark info:
Python implementation: CPython
Python version : 3.7.9
IPython version : 7.27.0
numpy : 1.20.3
pandas : 1.3.2
scikit-learn: 0.24.2
deduplipy : 0.5
Compiler : GCC 9.3.0
OS : Linux
Release : 5.11.0-7614-generic
Machine : x86_64
Processor : x86_64
CPU cores : 12
Architecture: 64bit