The following happens when using the issue56
branch as discussed in #56.
$ viztracer --tracer_entries 10000002 ~/code/sanbox/spline_pipeline_randomized_search.py
sys.version='3.9.10 (heads/issue56:851b1aac3e, May 2 2022, 11:56:36)\n[nogil, GCC 11.2.0]', joblib_backend_name='threading', n_jobs=8, nogil=True
Fitting 5 folds for each of 3 candidates, totalling 15 fits
Segmentation fault (core dumped)
Here is the source code of spline_pipeline_randomized_search.py
:
import sys
from time import perf_counter
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.datasets import fetch_covtype
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import SplineTransformer
from sklearn.preprocessing import PolynomialFeatures
from sklearn.kernel_approximation import Nystroem
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from joblib import parallel_backend
from threadpoolctl import threadpool_limits
nogil = getattr(sys.flags, "nogil", False)
joblib_backend_name = "threading" if nogil else "loky"
n_jobs = 8
print(f"{sys.version=}, {joblib_backend_name=!r}, {n_jobs=}, {nogil=}")
X, y = fetch_covtype(return_X_y=True, as_frame=False)
X = X[:, :5]
X_train, X_test, y_train, y_test = train_test_split(
X, y, train_size=2_000, test_size=10_000, random_state=0
)
pipeline = Pipeline(
[
("preproc", None),
("splines", SplineTransformer()),
# ("poly", PolynomialFeatures(degree=2, interaction_only=True)),
("kernel", Nystroem(n_components=300, gamma=1e-3, random_state=0)),
(
"classifier",
SGDClassifier(
early_stopping=True,
validation_fraction=0.2,
n_iter_no_change=5,
random_state=0,
),
),
]
)
param_grid = {
"preproc": [None, QuantileTransformer()],
"splines__degree": [2, 3, 4],
"splines__n_knots": [3, 5, 10],
"kernel__gamma": np.logspace(-5, 3, 100),
"kernel__n_components": [10, 50, 100, 500],
"classifier__loss": ["hinge", "log_loss"],
"classifier__alpha": np.logspace(-5, 1, 100),
}
tic = perf_counter()
with parallel_backend(joblib_backend_name, n_jobs=n_jobs):
with threadpool_limits(limits=1 if nogil else None):
search = RandomizedSearchCV(
pipeline,
param_distributions=param_grid,
n_iter=3,
verbose=1,
random_state=0,
).fit(X_train, y_train)
toc = perf_counter()
print(f"search completed in {toc - tic:.1f}s")
print(search.best_params_)
print(search.score(X_test, y_test))
Here is the gdb backtrace:
#0 _Py_IncRefShared (op=<unknown at remote 0x5555558aaec8>) at Objects/object.c:2334
#1 0x00007ffff6d59dd7 in _Py_INCREF (op=<optimized out>) at /home/ogrisel/code/nogil/Include/object.h:517
#2 snaptrace_tracefunc (obj=obj@entry=<viztracer.Tracer at remote 0x293469ca280>, frame=frame@entry=Frame 0x29349870a90, for file /home/ogrisel/code/nogil/Lib/inspect.py, line 2819, in <genexpr> (),
what=what@entry=3, arg=arg@entry=('knots', <Parameter at remote 0x29349811d20>)) at src/viztracer/modules/snaptrace.c:387
#3 0x00005555556a125f in call_profile (ts=ts@entry=0x555555b40d10, frame=Frame 0x29349870a90, for file /home/ogrisel/code/nogil/Lib/inspect.py, line 2819, in <genexpr> (), what=what@entry=3,
arg=arg@entry=('knots', <Parameter at remote 0x29349811d20>)) at Python/ceval_meta.c:3511
#4 0x00005555556ab049 in vm_profile (ts=ts@entry=0x555555b40d10, last_pc=last_pc@entry=0x293445e6890 "\\\002\002M\001W", acc=..., acc@entry=('knots', <Parameter at remote 0x29349811d20>))
at Python/ceval_meta.c:3644
#5 0x00005555556ab148 in vm_trace_handler (ts=0x555555b40d10, last_pc=0x293445e6890 "\\\002\002M\001W", acc=('knots', <Parameter at remote 0x29349811d20>)) at Python/ceval_meta.c:3778
#6 0x00005555558103c1 in _PyEval_Fast (ts=0x555555b40d10, initial_acc=Register(as_int64 = 10000003), initial_pc=0x293445e6893 "M\001W") at Python/ceval.c:2784
#7 0x00005555556a97e0 in _PyEval_Eval (pc=<optimized out>, acc=..., tstate=0x555555b40d10) at Python/ceval_meta.c:2831
#8 PyEval2_EvalGen (gen=gen@entry=0x293498f0290, opt_value=None) at Python/ceval_meta.c:2862
#9 0x00005555557c23ac in gen_send_internal (gen=0x293498f0290, opt_value=<optimized out>) at Objects/genobject.c:244
#10 0x00005555557a63b0 in PyIter_Next (iter=iter@entry=<generator at remote 0x293498f0290>) at Objects/abstract.c:2702
#11 0x00005555555fbcc1 in PyDict_MergeFromSeq2 (d=d@entry=<OrderedDict at remote 0x29349850c10>, seq2=seq2@entry=<generator at remote 0x293498f0290>, override=override@entry=1) at Objects/dictobject.c:1895
#12 0x00005555555fc178 in dict_update_arg (self=<OrderedDict at remote 0x29349850c10>, arg=<generator at remote 0x293498f0290>) at Objects/dictobject.c:1827
#13 0x00005555555fc4a9 in dict_update_common (methname=0x555555882419 "dict", kwds=0x0, args=<optimized out>, self=<OrderedDict at remote 0x29349850c10>) at Objects/dictobject.c:1841
#14 dict_init (self=<OrderedDict at remote 0x29349850c10>, args=<optimized out>, kwds=0x0) at Objects/dictobject.c:2757
#15 0x00005555556242d7 in type_call (type=<optimized out>, args=(<generator at remote 0x293498f0290>,), kwds=0x0) at Objects/typeobject.c:1044
#16 0x00005555555c5a6d in _PyObject_MakeTpCall (tstate=0x555555b40d10, callable=<type at remote 0x29343e6fc10>, args=<optimized out>, nargs=1, keywords=0x0) at Objects/call.c:191
#17 0x00005555556a4bfb in _PyObject_VectorcallTstate (kwnames=0x0, nargsf=<optimized out>, args=0x7fffccc25348, callable=<type at remote 0x29343e6fc10>, tstate=0x555555b40d10)
at ./Include/cpython/abstract.h:116
#18 _PyObject_VectorcallTstate (kwnames=0x0, nargsf=<optimized out>, args=0x7fffccc25348, callable=<type at remote 0x29343e6fc10>, tstate=0x555555b40d10) at ./Include/cpython/abstract.h:103
#19 vm_call_function (ts=0x555555b40d10, acc=...) at Python/ceval_meta.c:1341
#20 0x000055555581127b in _PyEval_Fast (ts=0x555555b40d10, initial_acc=Register(as_int64 = 10000003), initial_pc=0x293446e9e07 "F\017\001") at Python/ceval.c:982
#21 0x00005555556a8d2f in _PyEval_Eval (pc=<optimized out>, acc=..., tstate=0x555555b40d10) at Python/ceval_meta.c:2831
#22 _PyFunction_Vectorcall (func=<function at remote 0x293447750b0>, stack=0x29349801e48, nargsf=<optimized out>, kwnames=<optimized out>) at Python/ceval_meta.c:3228
#23 0x00005555555c7e68 in _PyObject_FastCallDictTstate (kwargs={'return_annotation': <type at remote 0x293446eec10>, '__validate_parameters__': False}, nargsf=2, args=0x7fffccc254c0,
callable=<function at remote 0x293447750b0>, tstate=0x555555b40d10) at Objects/call.c:129
#24 _PyObject_Call_Prepend (tstate=tstate@entry=0x555555b40d10, callable=callable@entry=<function at remote 0x293447750b0>, obj=obj@entry=<Signature at remote 0x293498209d0>,
args=args@entry=([<Parameter at remote 0x29349811a50>, <Parameter at remote 0x29349811b40>, <Parameter at remote 0x29349811c30>, <Parameter at remote 0x29349811d20>, <Parameter at remote 0x29349811e10>, <Parameter at remote 0x29349811f00>, <Parameter at remote 0x29349811eb0>],), kwargs=kwargs@entry={'return_annotation': <type at remote 0x293446eec10>, '__validate_parameters__': False}) at Objects/call.c:387
#25 0x0000555555631d84 in slot_tp_init (self=<Signature at remote 0x293498209d0>,
args=([<Parameter at remote 0x29349811a50>, <Parameter at remote 0x29349811b40>, <Parameter at remote 0x29349811c30>, <Parameter at remote 0x29349811d20>, <Parameter at remote 0x29349811e10>, <Parameter at remote 0x29349811f00>, <Parameter at remote 0x29349811eb0>],), kwds={'return_annotation': <type at remote 0x293446eec10>, '__validate_parameters__': False}) at Objects/typeobject.c:7059
#26 0x00005555556242d7 in type_call (type=<optimized out>,
args=([<Parameter at remote 0x29349811a50>, <Parameter at remote 0x29349811b40>, <Parameter at remote 0x29349811c30>, <Parameter at remote 0x29349811d20>, <Parameter at remote 0x29349811e10>, <Parameter at remote 0x29349811f00>, <Parameter at remote 0x29349811eb0>],), kwds={'return_annotation': <type at remote 0x293446eec10>, '__validate_parameters__': False}) at Objects/typeobject.c:1044
#27 0x00005555555c5a6d in _PyObject_MakeTpCall (tstate=0x555555b40d10, callable=<type at remote 0x29344574610>, args=<optimized out>, nargs=1, keywords=('return_annotation', '__validate_parameters__'))
at Objects/call.c:191
#28 0x00005555556a4981 in _PyObject_VectorcallTstate (kwnames=<optimized out>, nargsf=9223372036854775809, args=0x7fffc0000bf8, callable=<type at remote 0x29344574610>, tstate=0x555555b40d10)
at ./Include/cpython/abstract.h:116
#29 _PyObject_VectorcallTstate (kwnames=<optimized out>, nargsf=9223372036854775809, args=0x7fffc0000bf8, callable=<type at remote 0x29344574610>, tstate=0x555555b40d10) at ./Include/cpython/abstract.h:103
#30 vm_call_cfunction_slow (ts=0x555555b40d10, acc=...) at Python/ceval_meta.c:1296
#31 0x000055555581127b in _PyEval_Fast (ts=0x555555b40d10, initial_acc=Register(as_int64 = 10000003), initial_pc=0x29343d12c28 "F!\001\002K") at Python/ceval.c:982
#32 0x00005555556a8d2f in _PyEval_Eval (pc=<optimized out>, acc=..., tstate=0x555555b40d10) at Python/ceval_meta.c:2831
#33 _PyFunction_Vectorcall (func=<function at remote 0x29348a982b0>, stack=0x29349900008, nargsf=<optimized out>, kwnames=<optimized out>) at Python/ceval_meta.c:3228
#34 0x00005555555c7e68 in _PyObject_FastCallDictTstate (
kwargs={'train': <numpy.ndarray at remote 0x29349291ce0>, 'test': <numpy.ndarray at remote 0x29349291c70>, 'parameters': {'splines__n_knots': 10, 'splines__degree': 4, 'preproc': <QuantileTransformer(n_quantiles=1000, output_distribution='uniform', ignore_implicit_zeros=False, subsample=100000, random_state=None, copy=True) at remote 0x29349196550>, 'kernel__n_components': 500, 'kernel__gamma': <numpy.float64 at remote 0x29348d1f9c0>, 'classifier__loss': 'hinge', 'classifier__alpha': <numpy.float64 at remote 0x29348d1faa0>}, 'split_progress': (1, 5), 'candidate_progress': (0, 3), 'scorer': <function at remote 0x29348eca1f0>, 'fit_params': {}, 'return_train_score': False, 'return_n_test_samples': True, 'return_times': True, 'return_parameters': False, 'error_score': <float at remote 0x29345562380>, 'verbose': 1}, nargsf=4,
args=0x7fffccc25760, callable=<function at remote 0x29348a982b0>, tstate=0x555555b40d10) at Objects/call.c:129
#35 _PyObject_Call_Prepend (tstate=tstate@entry=0x555555b40d10, callable=callable@entry=<function at remote 0x29348a982b0>,
The py-bt
gdb commands returns:
Traceback (most recent call first):
File "/home/ogrisel/code/nogil/Lib/inspect.py", line 2819, in <genexpr>
params = OrderedDict((param.name, param) for param in parameters)
0x0