device is set to "cuda" and changing it to "cpu" returns the same error
python server.py
Traceback (most recent call last):
File "A:\xxxxxxxxx\emacs-secondmate\serve\server.py", line 10, in <module>
model = AutoModelForCausalLM.from_pretrained(modelname)
File "C:\Users\xxxxx\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\transformers\models\auto\auto_factory.py", line 395, in from_pretrained
return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
File "C:\Users\xxxxx\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\transformers\modeling_utils.py", line 1179, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "C:\Users\xxxxx\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py", line 905, in __init__
self.transformer = GPTNeoModel(config)
File "C:\Users\xxxxx\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py", line 708, in __init__
self.h = nn.ModuleList([GPTNeoBlock(config, layer_id=i) for i in range(config.num_layers)])
File "C:\Users\xxxxx\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py", line 708, in <listcomp>
self.h = nn.ModuleList([GPTNeoBlock(config, layer_id=i) for i in range(config.num_layers)])
File "C:\Users\xxxxx\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py", line 542, in __init__
self.mlp = GPTNeoMLP(inner_dim, config)
File "C:\Users\xxxxx\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\transformers\models\gpt_neo\modeling_gpt_neo.py", line 521, in __init__
self.c_fc = nn.Linear(embed_dim, intermediate_size)
File "C:\Users\xxxxx\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\torch\nn\modules\linear.py", line 81, in __init__
self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:79] data. DefaultCPUAllocator: not enough memory: you tried to allocate 104857600 bytes.