import h5py
data = h5py.File("data.hdf5","r")
writer = tf.io.TFRecordWriter("./tfrecord_1011")
#data = np.array(data)
dtype = np.float32
onehots_elements = {
'H': np.array([1, 0, 0, 0, 0, 0, 0], dtype=dtype),
'C': np.array([0, 1, 0, 0, 0, 0, 0], dtype=dtype),
'N': np.array([0, 0, 1, 0, 0, 0, 0], dtype=dtype),
'O': np.array([0, 0, 0, 1, 0, 0, 0], dtype=dtype),
'F': np.array([0, 0, 0, 0, 1, 0, 0], dtype=dtype),
'S': np.array([0, 0, 0, 0, 0, 1, 0], dtype=dtype),
'CL': np.array([0, 0, 0, 0, 0, 0, 1], dtype=dtype),
'Cl': np.array([0, 0, 0, 0, 0, 0, 1], dtype=dtype),
}
count = 0
for key in data: # Iterates over each Unique Identifier
coordinates = data[key]['coordinates'][()]
elements = data[key]['elements'][()]
monopoles = data[(key)]['monopoles'][()]
dipoles = data[(key)]['dipoles'][()]
quadrupoles = data[key]['quadrupoles'][()]
#print("element,type",elements)
elements = np.char.decode(elements,encoding="utf-8")
tensor = [onehots_elements[e] for e in elements]
graphs = build_graph(coordinates, elements, cutoff=4.0, num_kernels=32)
batch = {
'nodes': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(graphs.nodes).numpy()])),
'edges': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(graphs.edges).numpy()])),
'coordinates': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(coordinates).numpy()])),
'n_node': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(graphs.n_node).numpy()])),
'n_edge': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(graphs.n_edge).numpy()])),
'senders': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(graphs.senders).numpy()])),
'receivers': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(graphs.receivers).numpy()])),
'monopoles': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(monopoles).numpy()])),
'dipoles': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(dipoles).numpy()])),
'quadrupoles': tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(quadrupoles).numpy()])),
}
example = tf.train.Example(features=tf.train.Features(feature=batch)).SerializeToString()
writer.write(example)
count+=1
if count==1:
break
print("go on")
dtype_record = tf.float32
def load_data(record):
batch = tf.io.parse_single_example(record, feature_description)
nodes = tf.io.parse_tensor(batch['nodes'], out_type=dtype_record)
edges = tf.io.parse_tensor(batch['edges'], out_type=dtype_record)
coords = tf.io.parse_tensor(batch['coordinates'], out_type=dtype_record)
n_node = tf.io.parse_tensor(batch['n_node'], out_type=tf.int32)
n_edge = tf.io.parse_tensor(batch['n_edge'], out_type=tf.int32)
senders = tf.io.parse_tensor(batch['senders'], out_type=tf.int32)
receivers = tf.io.parse_tensor(batch['receivers'], out_type=tf.int32)
monopoles = tf.io.parse_tensor(batch['monopoles'], out_type=dtype_record)
dipoles = tf.io.parse_tensor(batch['dipoles'], out_type=dtype_record)
quadrupoles = D_Q(tf.io.parse_tensor(batch['quadrupoles'], out_type=dtype_record))
graph = gn.graphs.GraphsTuple(nodes, edges, globals=None, receivers=receivers, senders=senders, n_node=n_node, n_edge=n_edge)
return graph, coords, monopoles, dipoles, quadrupoles
DATASET_FOLDER = "./tfrecord_1011"
import json
from google.protobuf.json_format import MessageToJson
dataset = tf.data.TFRecordDataset("./tfrecord_1011")
for d in dataset:
ex = tf.train.Example()
ex.ParseFromString(d.numpy())
m = json.loads(MessageToJson(ex))
print(m['features']['feature'].keys(),m['features']['feature'].values())
dataset = tf.data.TFRecordDataset([DATASET_FOLDER.format(x) for x in np.random.choice(1, 1, replace=False)], num_parallel_reads=2)
dataset = dataset
.repeat()
.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
.prefetch(tf.data.AUTOTUNE)
.apply(tf.data.experimental.ignore_errors())
.shuffle(32, reshuffle_each_iteration=True)
dataset
<ShuffleDataset element_spec=(GraphsTuple(nodes=TensorSpec(shape=, dtype=tf.float32, name=None), edges=TensorSpec(shape=, dtype=tf.float32, name=None), receivers=TensorSpec(shape=, dtype=tf.int32, name=None), senders=TensorSpec(shape=, dtype=tf.int32, name=None), globals=NoneTensorSpec(), n_node=TensorSpec(shape=, dtype=tf.int32, name=None), n_edge=TensorSpec(shape=, dtype=tf.int32, name=None)), TensorSpec(shape=, dtype=tf.float32, name=None), TensorSpec(shape=, dtype=tf.float32, name=None), TensorSpec(shape=, dtype=tf.float32, name=None), TensorSpec(shape=, dtype=tf.float32, name=None))>
Is there something wrong with me? Why is the shape equals ?