First of all a thank you for this tool, which seems to be very useful for automating scientifc workflows. I am using doit for automating a training-evaluation workflow, which contains many embarrassingly parallel tasks so the -n <NUM_JOB>
options is exactly what I want. Unfortunately there are some pickling isses when doing so on Windows.
My software stack:
configparser 3.3.0.post2
doit 0.28.0
pip 6.1.1
python 2.7.9
setuptools 15.1
six 1.9.0
dodo.py
(instead of the echo
commands, some python scripts are started in my application, but the parallel execution behavior is the same):
# -*- coding: utf-8 -*-
import os
import os.path as osp
from doit import tools
FEATURES = ["lbp_small", "lbp_medium", "lbp_72angles", "hog_normalised",
"hog_default", "daisy_default", "hog_single_cell"]
OUT = "out"
paths = {}
paths["OUT_FEATURES"] = osp.join(OUT, "features")
paths["OUT_EVALUATION"] = osp.join(OUT, "evaluation")
paths["OUT_FIGURES"] = osp.join(OUT, "figures")
def task_feature_extraction():
for feat in FEATURES:
feat_spec = "feat_{}.json".format(feat)
feat_file = osp.join(paths["OUT_FEATURES"], "feat_{}.hdf5".format(feat))
yield {"name": feat_file,
"actions": ["echo extract %s > %s" % (feat_spec, feat_file)],
"targets": [feat_file],
"clean": True,
# force doit to always mark the task
# as up-to-date (unless target removed)
'uptodate': [True]}
Command: doit -n 4
causes traceback:
Traceback (most recent call last):
File "C:\Anaconda\envs\surface-classification\lib\site-packages\doit\doit_cmd.py", line 165, in run
return command.parse_execute(args)
File "C:\Anaconda\envs\surface-classification\lib\site-packages\doit\cmd_base.py", line 122, in parse_execute
return self.execute(params, args)
File "C:\Anaconda\envs\surface-classification\lib\site-packages\doit\cmd_base.py", line 405, in execute
return self._execute(**exec_params)
File "C:\Anaconda\envs\surface-classification\lib\site-packages\doit\cmd_run.py", line 239, in _execute
return runner.run_all(self.control.task_dispatcher())
File "C:\Anaconda\envs\surface-classification\lib\site-packages\doit\runner.py", line 238, in run_all
self.run_tasks(task_dispatcher)
File "C:\Anaconda\envs\surface-classification\lib\site-packages\doit\runner.py", line 417, in run_tasks
proc_list = self._run_start_processes(job_q, result_q)
File "C:\Anaconda\envs\surface-classification\lib\site-packages\doit\runner.py", line 390, in _run_start_processes
process.start()
File "C:\Anaconda\envs\surface-classification\lib\multiprocessing\process.py", line 130, in start
self._popen = Popen(self)
File "C:\Anaconda\envs\surface-classification\lib\multiprocessing\forking.py", line 277, in __init__
dump(process_obj, to_child, HIGHEST_PROTOCOL)
File "C:\Anaconda\envs\surface-classification\lib\multiprocessing\forking.py", line 199, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 224, in dump
self.save(obj)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 419, in save_reduce
save(state)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 649, in save_dict
self._batch_setitems(obj.iteritems())
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 681, in _batch_setitems
save(v)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\multiprocessing\forking.py", line 67, in dispatcher
self.save_reduce(obj=obj, *rv)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 401, in save_reduce
save(args)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 548, in save_tuple
save(element)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 419, in save_reduce
save(state)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 649, in save_dict
self._batch_setitems(obj.iteritems())
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 681, in _batch_setitems
save(v)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 419, in save_reduce
save(state)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 649, in save_dict
self._batch_setitems(obj.iteritems())
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 681, in _batch_setitems
save(v)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\multiprocessing\forking.py", line 67, in dispatcher
self.save_reduce(obj=obj, *rv)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 401, in save_reduce
save(args)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 548, in save_tuple
save(element)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 419, in save_reduce
save(state)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 649, in save_dict
self._batch_setitems(obj.iteritems())
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 681, in _batch_setitems
save(v)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 419, in save_reduce
save(state)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 649, in save_dict
self._batch_setitems(obj.iteritems())
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 681, in _batch_setitems
save(v)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 331, in save
self.save_reduce(obj=obj, *rv)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 396, in save_reduce
save(cls)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 748, in save_global
(obj, module, name))
PicklingError: Can't pickle <type 'DB'>: it's not found as __builtin__.DB
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Anaconda\envs\surface-classification\lib\multiprocessing\forking.py", line 381, in main
self = load(from_parent)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 1378, in load
return Unpickler(file).load()
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 858, in load
dispatch[key](self)
File "C:\Anaconda\envs\surface-classification\lib\pickle.py", line 880, in load_eof
raise EOFError
EOFError
Exception AttributeError: "'_DBWithCursor' object has no attribute 'dbc'" in ignored
If you need further details, please contact me.
Windows X-platform WIP