Runtime Error on Training Custom Dataset
See original GitHub issue❓ Questions & Help
Hi everyone, I am trying to train a simple GNN with using my dataset. I created the dataset with following Creating Your Own Datasets tutorial. In my case nodes with the same label are connected to each other. To create the GNN, I used enzymes_topk_pool.py example. Here is the error I get,
RuntimeError: CUDA error: device-side assert triggered The above operation failed in interpreter. Traceback (most recent call last): File "/home/rog/anaconda3/envs/py36_pytorch/lib/python3.6/site-packages/torch_scatter/scatter.py", line 39 else: size[dim] = int(index.max()) + 1 out = torch.zeros(size, dtype=src.dtype, device=src.device) ~~~~~~~~~~~ <--- HERE return out.scatter_add_(dim, index, src) else:
I also tried to train with device = torch.device('cpu')
, it produces the error below,
RuntimeError: index out of range: Tried to access index 11 out of table with 3 rows. at /opt/conda/conda-bld/pytorch_1579022034529/work/aten/src/TH/generic/THTensorEvenMoreMath.cpp:418
Here is what I have tried,
import json
import torch
from torch_geometric.data import InMemoryDataset, Data
from torch_geometric.data import DataLoader
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
import torch.nn.functional as F
import os.path as osp
NUM_CLASSES = 15
ROOT_PATH = "./data/"
################################ Create Dataset ################################
class GraphInputDataset(InMemoryDataset):
def __init__(self, root, train, transform=None, pre_transform=None):
super(GraphInputDataset, self).__init__(root, transform, pre_transform)
path = self.processed_paths[0] if train else self.processed_paths[1]
self.data, self.slices = torch.load(path)
# The name of the files to find in the self.raw_dir folder in order to skip the download.
@property
def raw_file_names(self):
return ['sub_{}_edges.txt'.format(file_name) for file_name in ['train', 'test']]
# A list of files in the processed_dir which needs to be found in order to skip the processing.
@property
def processed_file_names(self):
return ['train.pt', 'test.pt']
def download(self):
pass
def process(self):
for split, processed_path in zip (['train', 'test'], self.processed_paths):
node_file_path = osp.join(self.root, 'sub_{}_gcn_dataset.txt'.format(split))
edge_file_path = osp.join(self.root, 'sub_{}_edges.txt'.format(split))
# Read data into huge `Data` list.
is_data_fetched, data_list = read_dataset(node_file_path, edge_file_path)
if is_data_fetched:
if self.pre_filter is not None:
data_list = [data for data in data_list if self.pre_filter(data)]
if self.pre_transform is not None:
data_list = [self.pre_transform(data) for data in data_list]
data, slices = self.collate(data_list)
torch.save((data, slices), processed_path)
"""
Reads given dataset file which includes all data samples (contains test and
train set together) and edge file which contains relations between nodes
where two nodes are connected if they belong to the same class. Returns
resulting graph that is undirected.
"""
def read_dataset(node_file_path, edge_file_path):
with open(node_file_path) as node_file: # Read node file.
node_data = json.load(node_file)
with open(edge_file_path) as edge_file: # Read edge file.
edge_data = json.load(edge_file)
# Check number of classes
if len(edge_data) != NUM_CLASSES:
print("Classes length", len(edge_data))
return False, []
else:
data_list = []
# Fetch each sample seperately
for sample in node_data:
node_id = sample['nodeId']
features = torch.FloatTensor(sample['features']) # 1D, convert to 2d
features = features.view(1, features.shape[0]) # Data expects [num_nodes, num_node_features]
label = sample['label'] - 1 # -1 since classes start from 1
raw_edges = edge_data[label][str(label+1)] # Edges include id of node being processed, so remove it.
e_from = []
e_to = []
for neighbour_id in raw_edges:
if neighbour_id != node_id:
e_from.append(node_id)
e_to.append(neighbour_id)
edge_index = torch.LongTensor([e_from, e_to])
data = Data(x=features, y=torch.LongTensor([label]), edge_index=edge_index)
data_list.append(data)
return True, data_list
# Create dataset
train_dataset = GraphInputDataset(ROOT_PATH, train=True)
test_dataset = GraphInputDataset(ROOT_PATH, train=False)
# Check number of classes & features of nodes in datasets
assert train_dataset.num_classes == test_dataset.num_classes
assert train_dataset.num_features == test_dataset.num_features
####################################### Build a GNN #######################################
# See https://github.com/rusty1s/pytorch_geometric/blob/master/examples/enzymes_topk_pool.py
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = GraphConv(train_dataset.num_features, 128)
self.pool1 = TopKPooling(128, ratio=0.8)
self.conv2 = GraphConv(128, 128)
self.pool2 = TopKPooling(128, ratio=0.8)
self.conv3 = GraphConv(128, 128)
self.pool3 = TopKPooling(128, ratio=0.8)
self.lin1 = torch.nn.Linear(256, 128)
self.lin2 = torch.nn.Linear(128, 64)
self.lin3 = torch.nn.Linear(64, train_dataset.num_classes)
def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch
x = F.relu(self.conv1(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = F.relu(self.conv2(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = F.relu(self.conv3(x, edge_index))
x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = x1 + x2 + x3
x = F.relu(self.lin1(x))
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.lin2(x))
x = F.log_softmax(self.lin3(x), dim=-1)
return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')
print("device:", device)
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
def train(epoch):
model.train()
loss_all = 0
for data in train_loader:
data = data.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, data.y)
loss.backward()
loss_all += data.num_graphs * loss.item()
optimizer.step()
return loss_all / len(train_dataset)
def test(loader):
model.eval()
correct = 0
for data in loader:
data = data.to(device)
pred = model(data).max(dim=1)[1]
correct += pred.eq(data.y).sum().item()
return correct / len(loader.dataset)
train_loader = DataLoader(train_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)
for epoch in range(1, 201):
loss = train(epoch)
train_acc = test(train_loader)
test_acc = test(test_loader)
print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'.
format(epoch, loss, train_acc, test_acc))
Here are the files to produce the same error, sub_train_gcn_dataset.txt sub_train_edges.txt sub_test_gcn_dataset.txt sub_test_edges.txt
Issue Analytics
- State:
- Created 4 years ago
- Comments:15 (6 by maintainers)
Top GitHub Comments
There is an index error going on. Eventually, you have an invalid entry in
edge_index
that is larger or equal tox.size(0)
?Thanks a lot for your patience and great work, I’ll give them a try.