Thank you for your great work! As the title descripted, I used the your pretrained model and follow the code of semantic fpn in PVT as mentioned in paper. I used two GPU to train that(Tesla v100), but the result mIou is 46.22 which is about 2 lower than paper. Could you plz release the log which can help me find what was wrong. I really appreciate that.
This is part of my log:
2022-12-03 20:56:59,810 - mmseg - INFO - Distributed training: True
2022-12-03 20:57:00,402 - mmseg - INFO - Config:
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=
'"/home/PVT/segmentation/pretrained/SSA/ckpt_S.pth"',
backbone=dict(
type='shunted_s',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 1, 1),
strides=(1, 2, 2, 2),
norm_cfg=dict(type='SyncBN', requires_grad=True),
norm_eval=False,
style='pytorch',
contract_dilation=True),
neck=dict(
type='FPN',
in_channels=[64, 128, 256, 512],
out_channels=256,
num_outs=4),
decode_head=dict(
type='FPNHead',
in_channels=[256, 256, 256, 256],
in_index=[0, 1, 2, 3],
feature_strides=[4, 8, 16, 32],
channels=128,
dropout_ratio=0.1,
num_classes=150,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
train_cfg=dict(),
test_cfg=dict(mode='whole'))
dataset_type = 'ADE20KDataset'
data_root = '/home/ADEChallengeData2016'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 512),
flip=False,
transforms=[
dict(type='AlignResize', keep_ratio=True, size_divisor=32),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=50,
dataset=dict(
type='ADE20KDataset',
data_root=
'/home/ADEChallengeData2016',
img_dir='images/training',
ann_dir='annotations/training',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='Resize',
img_scale=(2048, 512),
ratio_range=(0.5, 2.0)),
dict(
type='RandomCrop',
crop_size=(512, 512),
cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
])),
val=dict(
type='ADE20KDataset',
data_root=
'/home/ADEChallengeData2016',
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 512),
flip=False,
transforms=[
dict(type='AlignResize', keep_ratio=True, size_divisor=32),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
type='ADE20KDataset',
data_root=
'/home/ADEChallengeData2016',
img_dir='images/validation',
ann_dir='annotations/validation',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(2048, 512),
flip=False,
transforms=[
dict(type='AlignResize', keep_ratio=True, size_divisor=32),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]))
log_config = dict(
interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = '/home/PVT/segmentation/pretrained/SSA/shunted_s_v.pth'
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
gpu_multiples = 1
optimizer = dict(type='AdamW', lr=0.0001, weight_decay=0.0001)
optimizer_config = dict()
lr_config = dict(policy='poly', power=0.9, min_lr=0.0, by_epoch=False)
runner = dict(type='IterBasedRunner', max_iters=80000)
checkpoint_config = dict(by_epoch=False, interval=8000)
evaluation = dict(interval=8000, metric='mIoU')
device = 'cuda'
work_dir = 'work_dirs_shunted'
gpu_ids = range(0, 1)
2022-12-03 20:57:01,314 - mmseg - INFO - EncoderDecoder(
(backbone): shunted_s(
(patch_embed1): Head(
(conv): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(64, 64, kernel_size=(2, 2), stride=(2, 2))
)
(norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
)
(block1): ModuleList(
(0): Block(
(norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=64, out_features=64, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=64, out_features=64, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(64, 64, kernel_size=(4, 4), stride=(4, 4))
(norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=64, out_features=64, bias=True)
(kv2): Linear(in_features=64, out_features=64, bias=True)
(local_conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
(local_conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
)
(drop_path): Identity()
(norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=64, out_features=512, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=512, out_features=64, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): Block(
(norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=64, out_features=64, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=64, out_features=64, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(64, 64, kernel_size=(4, 4), stride=(4, 4))
(norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=64, out_features=64, bias=True)
(kv2): Linear(in_features=64, out_features=64, bias=True)
(local_conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
(local_conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
)
(drop_path): Identity()
(norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=64, out_features=512, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=512, out_features=64, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
(patch_embed2): OverlapPatchEmbed(
(proj): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
)
(block2): ModuleList(
(0): Block(
(norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=128, out_features=128, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=128, out_features=128, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
(norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(128, 128, kernel_size=(2, 2), stride=(2, 2))
(norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=128, out_features=128, bias=True)
(kv2): Linear(in_features=128, out_features=128, bias=True)
(local_conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
(local_conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
)
(drop_path): Identity()
(norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=128, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=128, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): Block(
(norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=128, out_features=128, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=128, out_features=128, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
(norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(128, 128, kernel_size=(2, 2), stride=(2, 2))
(norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=128, out_features=128, bias=True)
(kv2): Linear(in_features=128, out_features=128, bias=True)
(local_conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
(local_conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
)
(drop_path): Identity()
(norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=128, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=128, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(2): Block(
(norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=128, out_features=128, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=128, out_features=128, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
(norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(128, 128, kernel_size=(2, 2), stride=(2, 2))
(norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=128, out_features=128, bias=True)
(kv2): Linear(in_features=128, out_features=128, bias=True)
(local_conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
(local_conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
)
(drop_path): Identity()
(norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=128, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=128, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(3): Block(
(norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=128, out_features=128, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=128, out_features=128, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
(norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(128, 128, kernel_size=(2, 2), stride=(2, 2))
(norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=128, out_features=128, bias=True)
(kv2): Linear(in_features=128, out_features=128, bias=True)
(local_conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
(local_conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
)
(drop_path): Identity()
(norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=128, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=128, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
(patch_embed3): OverlapPatchEmbed(
(proj): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
)
(block3): ModuleList(
(0): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(2): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(3): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(4): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(5): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(6): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(7): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(8): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(9): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(10): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(11): Block(
(norm1): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=256, out_features=256, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=256, out_features=256, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(act): GELU(approximate='none')
(sr1): Conv2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(sr2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(kv1): Linear(in_features=256, out_features=256, bias=True)
(kv2): Linear(in_features=256, out_features=256, bias=True)
(local_conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
(local_conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128)
)
(drop_path): Identity()
(norm2): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=256, out_features=1024, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(norm3): LayerNorm((256,), eps=1e-06, elementwise_affine=True)
(patch_embed4): OverlapPatchEmbed(
(proj): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(block4): ModuleList(
(0): Block(
(norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(q): Linear(in_features=512, out_features=512, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=512, out_features=512, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(kv): Linear(in_features=512, out_features=1024, bias=True)
(local_conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
)
(drop_path): Identity()
(norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=512, out_features=2048, bias=True)
(dwconv): DWConv(
(dwconv): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
)
(act): GELU(approximate='none')
(fc2): Linear(in_features=2048, out_features=512, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(norm4): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
)
(neck): FPN(
(lateral_convs): ModuleList(
(0): ConvModule(
(conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
)
(1): ConvModule(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
)
(2): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
)
(3): ConvModule(
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
)
)
(fpn_convs): ModuleList(
(0): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(1): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(2): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(3): ConvModule(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
init_cfg={'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
(decode_head): FPNHead(
input_transform=multiple_select, ignore_index=255, align_corners=False
(loss_decode): CrossEntropyLoss(avg_non_ignore=False)
(conv_seg): Conv2d(128, 150, kernel_size=(1, 1), stride=(1, 1))
(dropout): Dropout2d(p=0.1, inplace=False)
(scale_heads): ModuleList(
(0): Sequential(
(0): ConvModule(
(conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activate): ReLU(inplace=True)
)
)
(1): Sequential(
(0): ConvModule(
(conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activate): ReLU(inplace=True)
)
(1): Upsample()
)
(2): Sequential(
(0): ConvModule(
(conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activate): ReLU(inplace=True)
)
(1): Upsample()
(2): ConvModule(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activate): ReLU(inplace=True)
)
(3): Upsample()
)
(3): Sequential(
(0): ConvModule(
(conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activate): ReLU(inplace=True)
)
(1): Upsample()
(2): ConvModule(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activate): ReLU(inplace=True)
)
(3): Upsample()
(4): ConvModule(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activate): ReLU(inplace=True)
)
(5): Upsample()
)
)
)
init_cfg={'type': 'Normal', 'std': 0.01, 'override': {'name': 'conv_seg'}}
)
……
2022-12-04 06:05:23,095 - mmseg - INFO - Iter [72050/80000] lr: 1.252e-05, eta: 1:00:25, time: 6.361, data_time: 5.941, memory: 17214, decode.loss_ce: 0.2797, decode.acc_seg: 88.8924, loss: 0.2797
2022-12-04 06:05:44,463 - mmseg - INFO - Iter [72100/80000] lr: 1.245e-05, eta: 1:00:02, time: 0.427, data_time: 0.006, memory: 17214, decode.loss_ce: 0.3011, decode.acc_seg: 88.9319, loss: 0.3011
2022-12-04 06:06:06,310 - mmseg - INFO - Iter [72150/80000] lr: 1.238e-05, eta: 0:59:39, time: 0.437, data_time: 0.016, memory: 1
2022-12-04 07:04:18,659 - mmseg - INFO - Summary:
2022-12-04 07:04:18,659 - mmseg - INFO -
+-------+-------+-------+
| aAcc | mIoU | mAcc |
+-------+-------+-------+
| 82.02 | 46.22 | 58.14 |
+-------+-------+-------+