The mAP we got only 66.95, not 69 as you said.
mAP: 0.6695016721761877
ap of each class: plane:0.8933495268168008, baseball-diamond:0.7374626613068532, bridge:0.4122850723332845, ground-track-field:0.6670794506088387, small-vehicle:0.760291145560589, large-vehicle:0.6047573582066816, ship:0.7630074552316077, tennis-court:0.9088855421686749, basketball-court:0.7977999161008033, storage-tank:0.8171024978735246, soccer-ball-field:0.5278917624380285, roundabout:0.635699247940088, harbor:0.514493570948177, swimming-pool:0.6264466877239606, helicopter:0.3759731873849022
I don't know what went wrong?
Here is our cfg:
2021-10-21 15:01:46,198 - mmdet - INFO - Environment info:
sys.platform: linux
Python: 3.7.7 (default, Mar 23 2020, 22:36:06) [GCC 7.3.0]
CUDA available: True
CUDA_HOME: /usr/local/cuda
NVCC: Cuda compilation tools, release 10.1, V10.1.243
GPU 0: GeForce RTX 2080 Ti
GCC: gcc (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
PyTorch: 1.7.1
PyTorch compiling details: PyTorch built with:
- GCC 7.3
- C++ Version: 201402
- Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
- Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)
- OpenMP 201511 (a.k.a. OpenMP 4.5)
- NNPACK is enabled
- CPU capability usage: AVX2
- CUDA Runtime 10.2
- NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75
- CuDNN 7.6.5
- Magma 2.5.2
- Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
TorchVision: 0.8.2
OpenCV: 4.2.0
MMCV: 0.6.2
MMDetection: 2.2.0+unknown
MMDetection Compiler: GCC 7.4
MMDetection CUDA Compiler: 10.1
2021-10-21 15:01:46,199 - mmdet - INFO - Distributed training: True
2021-10-21 15:01:46,373 - mmdet - INFO - Config:
dataset_type = 'DOTADataset'
data_root = '/data/zy/datasets/split_ss_dota1_0/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='LoadOBBAnnotations',
with_bbox=True,
with_label=True,
with_poly_as_mask=True),
dict(type='LoadDOTASpecialInfo'),
dict(type='Resize', img_scale=(1024, 1024), keep_ratio=True),
dict(type='OBBRandomFlip', h_flip_ratio=0.5, v_flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(
type='RandomOBBRotate',
rotate_after_flip=True,
angles=(0, 0),
vert_rate=0.5,
vert_cls=['roundabout', 'storage-tank']),
dict(type='Pad', size_divisor=32),
dict(type='DOTASpecialIgnore', ignore_size=2),
dict(type='FliterEmpty'),
dict(type='Mask2OBB', obb_type='obb'),
dict(type='OBBDefaultFormatBundle'),
dict(
type='OBBCollect',
keys=['img', 'gt_bboxes', 'gt_obboxes', 'gt_labels'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipRotateAug',
img_scale=[(1024, 1024)],
h_flip=False,
v_flip=False,
rotate=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='OBBRandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='RandomOBBRotate', rotate_after_flip=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='OBBCollect', keys=['img'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=4,
train=dict(
type='DOTADataset',
task='Task1',
ann_file='/data/zy/datasets/split_ss_dota1_0/trainval/annfiles/',
img_prefix='/data/zy/datasets/split_ss_dota1_0/trainval/images/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='LoadOBBAnnotations',
with_bbox=True,
with_label=True,
with_poly_as_mask=True),
dict(type='LoadDOTASpecialInfo'),
dict(type='Resize', img_scale=(1024, 1024), keep_ratio=True),
dict(type='OBBRandomFlip', h_flip_ratio=0.5, v_flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(
type='RandomOBBRotate',
rotate_after_flip=True,
angles=(0, 0),
vert_rate=0.5,
vert_cls=['roundabout', 'storage-tank']),
dict(type='Pad', size_divisor=32),
dict(type='DOTASpecialIgnore', ignore_size=2),
dict(type='FliterEmpty'),
dict(type='Mask2OBB', obb_type='obb'),
dict(type='OBBDefaultFormatBundle'),
dict(
type='OBBCollect',
keys=['img', 'gt_bboxes', 'gt_obboxes', 'gt_labels'])
]),
test=dict(
type='DOTADataset',
task='Task1',
ann_file='/data/zy/datasets/split_ss_dota1_0/test/annfiles/',
img_prefix='/data/zy/datasets/split_ss_dota1_0/test/images/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipRotateAug',
img_scale=[(1024, 1024)],
h_flip=False,
v_flip=False,
rotate=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='OBBRandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='RandomOBBRotate', rotate_after_flip=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='OBBCollect', keys=['img'])
])
]))
evaluation = None
optimizer = dict(type='SGD', lr=0.0025, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[8, 11])
total_epochs = 12
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
model = dict(
type='RetinaNetOBB',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_input',
num_outs=5),
bbox_head=dict(
type='OBBRetinaHead',
num_classes=15,
in_channels=256,
stacked_convs=4,
feat_channels=256,
anchor_generator=dict(
type='Theta0AnchorGenerator',
octave_base_scale=4,
scales_per_octave=3,
ratios=[0.5, 1.0, 2.0],
strides=[8, 16, 32, 64, 128]),
bbox_coder=dict(
type='OBB2OBBDeltaXYWHTCoder',
target_means=[0.0, 0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)))
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
gpu_assign_thr=-1,
ignore_iof_thr=-1,
iou_calculator=dict(type='OBBOverlaps')),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=2000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='obb_nms', iou_thr=0.1),
max_per_img=2000)
work_dir = './work_dirs/retinanet_obb_r50_fpn_1x_dota10'
gpu_ids = range(0, 1)