Nice to meet you.
I am a student studying with your package.
I am in trouble with the problem which I can not solve by myself.
I tried your tutorial with this site’s feature(https://qiita.com/Hironsan/items/326b66711eb4196aa9d4), and add cross-validation as follows.
from sklearn.model_selection import cross_validate
scores = cross_validate(crf, X, y, scoring="f1_macro", cv=5)
print(scores.test_score)
However, the following error occurs.
Traceback (most recent call last):
File "/program/crf.py", line 41, in <module>
scores = cross_validate(crf, X, y, scoring="f1_macro", cv=5)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 195, in cross_validate
for train, test in cv.split(X, y, groups))
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 779, in __call__
while self.dispatch_one_batch(iterator):
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 625, in dispatch_one_batch
self._dispatch(tasks)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 588, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 111, in apply_async
result = ImmediateResult(func)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 332, in __init__
self.results = batch()
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 467, in _fit_and_score
test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 502, in _score
return _multimetric_score(estimator, X_test, y_test, scorer)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 532, in _multimetric_score
score = scorer(estimator, X_test, y_test)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/metrics/scorer.py", line 108, in __call__
**self._kwargs)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 714, in f1_score
sample_weight=sample_weight)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 828, in fbeta_score
sample_weight=sample_weight)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 1025, in precision_recall_fscore_support
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/metrics/classification.py", line 72, in _check_targets
type_true = type_of_target(y_true)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/utils/multiclass.py", line 259, in type_of_target
raise ValueError('You appear to be using a legacy multi-label data'
ValueError: You appear to be using a legacy multi-label data representation. Sequence of sequences are no longer supported; use a binary array or sparse matrix instead.
So, I added the follows before crcross-validation.
trans_X = []
mlb = MultiLabelBinarizer()
for x in X:
x = mlb.fit_transform(x)
trans_X.append(x.astype(bytes))
X = trans_X
y = MultiLabelBinarizer().fit_transform(y)
y = y.astype(bytes)
However, the following error occurs.
Traceback (most recent call last):
File "/program/crf.py", line 41, in <module>
scores = cross_validate(crf, X, y, scoring="f1_macro", cv=5)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 195, in cross_validate
for train, test in cv.split(X, y, groups))
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 779, in __call__
while self.dispatch_one_batch(iterator):
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 625, in dispatch_one_batch
self._dispatch(tasks)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 588, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 111, in apply_async
result = ImmediateResult(func)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 332, in __init__
self.results = batch()
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn/model_selection/_validation.py", line 437, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/sklearn_crfsuite/estimator.py", line 314, in fit
trainer.append(xseq, yseq)
File "pycrfsuite/_pycrfsuite.pyx", line 312, in pycrfsuite._pycrfsuite.BaseTrainer.append
ValueError: The numbers of items and labels differ: |x| = 62, |y| = 3
Please tell me how to solve this problem.
Sorry to ask this of you when you are busy but I appreciate your help;;