I'm trying to translate Statistical Rethinking from R and RStan to Python and PyMC3.
On page 304, there's a simple logistic regression example. Data being used (publicly available):
dept applicant.gender admit reject applications is_male
1 A male 512 313 825 1
2 A female 89 19 108 0
3 B male 353 207 560 1
4 B female 17 8 25 0
5 C male 120 205 325 1
6 C female 202 391 593 0
7 D male 138 279 417 1
8 D female 131 244 375 0
9 E male 53 138 191 1
10 E female 94 299 393 0
11 F male 22 351 373 1
12 F female 24 317 341 0
Data is stored in a pandas DataFrame object. When I try and fit the model using:
with pm.Model() as m106:
alpha = pm.Normal('alpha', 0, 10)
beta_m = pm.Normal('beta_m', 0, 10)
lin = alpha + beta_m * data['is_male']
p = np.exp(lin) / (1 + np.exp(lin))
admit = pm.Binomial('admit', n=data['applications'], p=p, observed=data['admit'])
m106_map = pm.find_MAP()
m106_traces = pm.sample(1000, start=m106_map)
I get the following error (which seems similar to pymc-devs/pymc3#918):
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/type.py in dtype_specs(self)
266 'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')
--> 267 }[self.dtype]
268 except KeyError:
KeyError: 'object'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/basic.py in constant_or_value(x, rtype, name, ndim, dtype)
407 rval = rtype(
--> 408 TensorType(dtype=x_.dtype, broadcastable=bcastable),
409 x_.copy(),
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/type.py in __init__(self, dtype, broadcastable, name, sparse_grad)
49 self.broadcastable = tuple(bool(b) for b in broadcastable)
---> 50 self.dtype_specs() # error checking is done there
51 self.name = name
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/type.py in dtype_specs(self)
269 raise TypeError("Unsupported dtype for %s: %s"
--> 270 % (self.__class__.__name__, self.dtype))
271
TypeError: Unsupported dtype for TensorType: object
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/basic.py in as_tensor_variable(x, name, ndim)
201 try:
--> 202 return constant(x, name=name, ndim=ndim)
203 except TypeError:
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/basic.py in constant(x, name, ndim, dtype)
421 ret = constant_or_value(x, rtype=TensorConstant, name=name, ndim=ndim,
--> 422 dtype=dtype)
423
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/basic.py in constant_or_value(x, rtype, name, ndim, dtype)
416 except Exception:
--> 417 raise TypeError("Could not convert %s to TensorType" % x, type(x))
418
TypeError: ('Could not convert 1 Elemwise{mul,no_inplace}.0\n2 Elemwise{mul,no_inplace}.0\n3 Elemwise{mul,no_inplace}.0\n4 Elemwise{mul,no_inplace}.0\n5 Elemwise{mul,no_inplace}.0\n6 Elemwise{mul,no_inplace}.0\n7 Elemwise{mul,no_inplace}.0\n8 Elemwise{mul,no_inplace}.0\n9 Elemwise{mul,no_inplace}.0\n10 Elemwise{mul,no_inplace}.0\n11 Elemwise{mul,no_inplace}.0\n12 Elemwise{mul,no_inplace}.0\nName: applications, dtype: object to TensorType', <class 'pandas.core.series.Series'>)
During handling of the above exception, another exception occurred:
AsTensorError Traceback (most recent call last)
<ipython-input-144-fb615dfa2e93> in <module>()
7 p = np.exp(lin) / (1 + np.exp(lin))
8
----> 9 admit = pm.Binomial('admit', n=data['applications'], p=p, observed=data['admit'])
10
11 m106_map = pm.find_MAP()
/Users/horatiu/anaconda/lib/python3.5/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
24 if isinstance(name, string_types):
25 data = kwargs.pop('observed', None)
---> 26 dist = cls.dist(*args, **kwargs)
27 return model.Var(name, dist, data)
28 elif name is None:
/Users/horatiu/anaconda/lib/python3.5/site-packages/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
37 def dist(cls, *args, **kwargs):
38 dist = object.__new__(cls)
---> 39 dist.__init__(*args, **kwargs)
40 return dist
41
/Users/horatiu/anaconda/lib/python3.5/site-packages/pymc3/distributions/discrete.py in __init__(self, n, p, *args, **kwargs)
43 self.n = n
44 self.p = p
---> 45 self.mode = tt.cast(tt.round(n * p), self.dtype)
46
47 def random(self, point=None, size=None, repeat=None):
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/basic.py in round(a, mode)
2052 """round_mode(a) with mode in [half_away_from_zero, half_to_even]"""
2053 if mode == "half_away_from_zero":
-> 2054 return round_half_away_from_zero(a)
2055 elif mode == "half_to_even":
2056 return round_half_to_even(a)
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
609 """
610 return_list = kwargs.pop('return_list', False)
--> 611 node = self.make_node(*inputs, **kwargs)
612
613 if config.compute_test_value != 'off':
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/elemwise.py in make_node(self, *inputs)
541 using DimShuffle.
542 """
--> 543 inputs = list(map(as_tensor_variable, inputs))
544 shadow = self.scalar_op.make_node(
545 *[get_scalar_type(dtype=i.type.dtype).make_variable()
/Users/horatiu/anaconda/lib/python3.5/site-packages/theano/tensor/basic.py in as_tensor_variable(x, name, ndim)
206 except Exception:
207 str_x = repr(x)
--> 208 raise AsTensorError("Cannot convert %s to TensorType" % str_x, type(x))
209
210 # this has a different name, because _as_tensor_variable is the
AsTensorError: ('Cannot convert 1 Elemwise{mul,no_inplace}.0\n2 Elemwise{mul,no_inplace}.0\n3 Elemwise{mul,no_inplace}.0\n4 Elemwise{mul,no_inplace}.0\n5 Elemwise{mul,no_inplace}.0\n6 Elemwise{mul,no_inplace}.0\n7 Elemwise{mul,no_inplace}.0\n8 Elemwise{mul,no_inplace}.0\n9 Elemwise{mul,no_inplace}.0\n10 Elemwise{mul,no_inplace}.0\n11 Elemwise{mul,no_inplace}.0\n12 Elemwise{mul,no_inplace}.0\nName: applications, dtype: object to TensorType', <class 'pandas.core.series.Series'>)
The error is not very informative, and doesn't seem to point to my code.
But when using:
with pm.Model() as m106:
alpha = pm.Normal('alpha', 0, 10)
beta_m = pm.Normal('beta_m', 0, 10)
lin = alpha + beta_m * data['is_male']
p = np.exp(lin) / (1 + np.exp(lin))
admit = pm.Binomial('admit', n=data['applications'].values, p=p, observed=data['admit'])
m106_map = pm.find_MAP()
m106_traces = pm.sample(1000, start=m106_map)
So explicitly passing in the numpy array rather than the pandas Series:
admit = pm.Binomial('admit', n=data['applications'].values, p=p, observed=data['admit'])
Everything works as expected. I'm just trying to figure out why that is? Is there a reference in the documentation for this behavior? What is the lesson I should take away from this? :)
Using pandas 0.19, numpy 1.11, pymc3.0rc2
bug