dist_url='tcp://127.0.0.1:49152', eval_only=False, machine_rank=0, num_gpus=1, num_machines=1, opts=[], resume=False)
[08/25 11:15:41 detectron2]: Contents of args.config_file=projects/ISTR/configs/ISTR-AE-R50-3x.yaml:
BASE: "Base-ISTR.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
RESNETS:
DEPTH: 50
STRIDE_IN_1X1: False
ISTR:
NUM_PROPOSALS: 300
NUM_CLASSES: 5
MASK_ENCODING_METHOD: "AE"
PATH_COMPONENTS: "/content/drive/MyDrive/imenselmi/ISTR_TRAIN/ISTR/projects/AE/checkpoints/AE_112_256.t7"
DATASETS:
TRAIN: ("train",)
TEST: ("val",)
SOLVER:
STEPS: (210000, 250000)
MAX_ITER: 270000
INPUT:
FORMAT: "RGB"
[08/25 11:15:41 detectron2]: Running with full config:
CUDNN_BENCHMARK: true
DATALOADER:
ASPECT_RATIO_GROUPING: true
FILTER_EMPTY_ANNOTATIONS: true
NUM_WORKERS: 4
REPEAT_THRESHOLD: 0.0
SAMPLER_TRAIN: TrainingSampler
DATASETS:
PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
PROPOSAL_FILES_TEST: []
PROPOSAL_FILES_TRAIN: []
TEST:
- val
TRAIN:
- train
GLOBAL:
HACK: 1.0
INPUT:
CROP:
ENABLED: true
SIZE:
- 384
- 600
TYPE: absolute_range
FORMAT: RGB
MASK_FORMAT: polygon
MAX_SIZE_TEST: 1333
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TEST: 800
MIN_SIZE_TRAIN:
- 416
- 448
- 480
- 512
- 544
- 576
- 608
- 640
- 672
- 704
- 736
- 768
- 800
- 832
- 864
- 896
- 928
- 960
- 992
- 1024
- 1056
- 1088
MIN_SIZE_TRAIN_SAMPLING: choice
RANDOM_FLIP: horizontal
LSJ_AUG: false
MODEL:
ANCHOR_GENERATOR:
ANGLES:
-
-
- 0.5
- 1.0
- 2.0
NAME: DefaultAnchorGenerator
OFFSET: 0.0
SIZES:
-
- 32
- 64
- 128
- 256
- 512
BACKBONE:
FREEZE_AT: -1
NAME: build_resnet_fpn_backbone
DEVICE: cuda
FPN:
FUSE_TYPE: sum
IN_FEATURES:
- res2
- res3
- res4
- res5
NORM: ''
OUT_CHANNELS: 256
ISTR:
ALPHA: 0.25
CLASS_WEIGHT: 2.0
DEEP_SUPERVISION: true
DIM_DYNAMIC: 64
DIM_FEEDFORWARD: 2048
DROPOUT: 0.0
FEAT_WEIGHT: 1.0
GAMMA: 2.0
GIOU_WEIGHT: 2.0
HIDDEN_DIM: 256
IOU_LABELS:
- 0
- 1
IOU_THRESHOLDS:
- 0.5
L1_WEIGHT: 5.0
MASK_ENCODING_METHOD: AE
MASK_FEAT_DIM: 256
MASK_SIZE: 112
MASK_WEIGHT: 5.0
NHEADS: 8
NO_OBJECT_WEIGHT: 0.1
NUM_CLASSES: 5
NUM_CLS: 3
NUM_DYNAMIC: 2
NUM_HEADS: 6
NUM_MASK: 3
NUM_PROPOSALS: 300
NUM_REG: 3
PATH_COMPONENTS: /content/drive/MyDrive/imenselmi/ISTR_TRAIN/ISTR/projects/AE/checkpoints/AE_112_256.t7
PRIOR_PROB: 0.01
KEYPOINT_ON: false
LOAD_PROPOSALS: false
MASK_ON: true
META_ARCHITECTURE: ISTR
PANOPTIC_FPN:
COMBINE:
ENABLED: true
INSTANCES_CONFIDENCE_THRESH: 0.5
OVERLAP_THRESH: 0.5
STUFF_AREA_LIMIT: 4096
INSTANCE_LOSS_WEIGHT: 1.0
PIXEL_MEAN:
- 123.675
- 116.28
- 103.53
PIXEL_STD:
- 58.395
- 57.12
- 57.375
PROPOSAL_GENERATOR:
MIN_SIZE: 0
NAME: RPN
RESNETS:
DEFORM_MODULATED: false
DEFORM_NUM_GROUPS: 1
DEFORM_ON_PER_STAGE:
- false
- false
- false
- false
DEPTH: 50
NORM: FrozenBN
NUM_GROUPS: 1
OUT_FEATURES:
- res2
- res3
- res4
- res5
RES2_OUT_CHANNELS: 256
RES5_DILATION: 1
STEM_OUT_CHANNELS: 64
STRIDE_IN_1X1: false
WIDTH_PER_GROUP: 64
RETINANET:
BBOX_REG_LOSS_TYPE: smooth_l1
BBOX_REG_WEIGHTS: &id001
- 1.0
- 1.0
- 1.0
- 1.0
FOCAL_LOSS_ALPHA: 0.25
FOCAL_LOSS_GAMMA: 2.0
IN_FEATURES:
- p3
- p4
- p5
- p6
- p7
IOU_LABELS:
- 0
- -1
- 1
IOU_THRESHOLDS:
- 0.4
- 0.5
NMS_THRESH_TEST: 0.5
NORM: ''
NUM_CLASSES: 80
NUM_CONVS: 4
PRIOR_PROB: 0.01
SCORE_THRESH_TEST: 0.05
SMOOTH_L1_LOSS_BETA: 0.1
TOPK_CANDIDATES_TEST: 1000
ROI_BOX_CASCADE_HEAD:
BBOX_REG_WEIGHTS:
-
-
-
- 30.0
- 30.0
- 15.0
- 15.0
IOUS:
- 0.5
- 0.6
- 0.7
ROI_BOX_HEAD:
BBOX_REG_LOSS_TYPE: smooth_l1
BBOX_REG_LOSS_WEIGHT: 1.0
BBOX_REG_WEIGHTS:
- 10.0
- 10.0
- 5.0
- 5.0
CLS_AGNOSTIC_BBOX_REG: false
CONV_DIM: 256
FC_DIM: 1024
NAME: ''
NORM: ''
NUM_CONV: 0
NUM_FC: 0
POOLER_RESOLUTION: 7
POOLER_SAMPLING_RATIO: 2
POOLER_TYPE: ROIAlignV2
SMOOTH_L1_BETA: 0.0
TRAIN_ON_PRED_BOXES: false
ROI_HEADS:
BATCH_SIZE_PER_IMAGE: 512
IN_FEATURES:
- p2
- p3
- p4
- p5
IOU_LABELS:
- 0
- 1
IOU_THRESHOLDS:
- 0.5
NAME: Res5ROIHeads
NMS_THRESH_TEST: 0.5
NUM_CLASSES: 80
POSITIVE_FRACTION: 0.25
PROPOSAL_APPEND_GT: true
SCORE_THRESH_TEST: 0.05
ROI_KEYPOINT_HEAD:
CONV_DIMS:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
LOSS_WEIGHT: 1.0
MIN_KEYPOINTS_PER_IMAGE: 1
NAME: KRCNNConvDeconvUpsampleHead
NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
NUM_KEYPOINTS: 17
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 0
POOLER_TYPE: ROIAlignV2
ROI_MASK_HEAD:
CLS_AGNOSTIC_MASK: false
CONV_DIM: 256
NAME: MaskRCNNConvUpsampleHead
NORM: ''
NUM_CONV: 0
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 0
POOLER_TYPE: ROIAlignV2
RPN:
BATCH_SIZE_PER_IMAGE: 256
BBOX_REG_LOSS_TYPE: smooth_l1
BBOX_REG_LOSS_WEIGHT: 1.0
BBOX_REG_WEIGHTS: *id001
BOUNDARY_THRESH: -1
CONV_DIMS:
- -1
HEAD_NAME: StandardRPNHead
IN_FEATURES:
- res4
IOU_LABELS:
- 0
- -1
- 1
IOU_THRESHOLDS:
- 0.3
- 0.7
LOSS_WEIGHT: 1.0
NMS_THRESH: 0.7
POSITIVE_FRACTION: 0.5
POST_NMS_TOPK_TEST: 1000
POST_NMS_TOPK_TRAIN: 2000
PRE_NMS_TOPK_TEST: 6000
PRE_NMS_TOPK_TRAIN: 12000
SMOOTH_L1_BETA: 0.0
SEM_SEG_HEAD:
COMMON_STRIDE: 4
CONVS_DIM: 128
IGNORE_VALUE: 255
IN_FEATURES:
- p2
- p3
- p4
- p5
LOSS_WEIGHT: 1.0
NAME: SemSegFPNHead
NORM: GN
NUM_CLASSES: 54
SWINT:
APE: false
DEPTHS:
- 2
- 2
- 6
- 2
DROP_PATH_RATE: 0.2
EMBED_DIM: 96
MLP_RATIO: 4
NUM_HEADS:
- 3
- 6
- 12
- 24
OUT_FEATURES:
- stage2
- stage3
- stage4
- stage5
WINDOW_SIZE: 7
WEIGHTS: detectron2://ImageNetPretrained/torchvision/R-50.pkl
OUTPUT_DIR: ./output
SEED: 2333333
SOLVER:
AMP:
ENABLED: false
BACKBONE_MULTIPLIER: 1.0
BASE_LR: 2.5e-05
BIAS_LR_FACTOR: 1.0
CHECKPOINT_PERIOD: 5000
CLIP_GRADIENTS:
CLIP_TYPE: full_model
CLIP_VALUE: 1.0
ENABLED: true
NORM_TYPE: 2.0
GAMMA: 0.1
IMS_PER_BATCH: 16
LR_SCHEDULER_NAME: WarmupMultiStepLR
MAX_ITER: 270000
MOMENTUM: 0.9
NESTEROV: false
OPTIMIZER: ADAMW
REFERENCE_WORLD_SIZE: 0
STEPS:
- 210000
- 250000
WARMUP_FACTOR: 0.01
WARMUP_ITERS: 1000
WARMUP_METHOD: linear
WEIGHT_DECAY: 0.0001
WEIGHT_DECAY_BIAS: null
WEIGHT_DECAY_NORM: 0.0
TEST:
AUG:
ENABLED: false
FLIP: true
MAX_SIZE: 4000
MIN_SIZES:
- 400
- 500
- 600
- 700
- 800
- 900
- 1000
- 1100
- 1200
DETECTIONS_PER_IMAGE: 100
EVAL_PERIOD: 7330
EXPECTED_RESULTS: []
KEYPOINT_OKS_SIGMAS: []
PRECISE_BN:
ENABLED: false
NUM_ITER: 200
VERSION: 2
VIS_PERIOD: 0
[08/25 11:15:41 detectron2]: Full config saved to ./output/config.yaml
[08/25 11:15:47 d2.engine.defaults]: Model:
ISTR(
(backbone): FPN(
(fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
(fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
(fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
(fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(top_block): LastLevelMaxPool()
(bottom_up): ResNet(
(stem): BasicStem(
(conv1): Conv2d(
3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
(norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
)
)
(res2): Sequential(
(0): BottleneckBlock(
(shortcut): Conv2d(
64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv1): Conv2d(
64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
)
(conv2): Conv2d(
64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
)
(conv3): Conv2d(
64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
)
(1): BottleneckBlock(
(conv1): Conv2d(
256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
)
(conv2): Conv2d(
64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
)
(conv3): Conv2d(
64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
)
(2): BottleneckBlock(
(conv1): Conv2d(
256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
)
(conv2): Conv2d(
64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
)
(conv3): Conv2d(
64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
)
)
(res3): Sequential(
(0): BottleneckBlock(
(shortcut): Conv2d(
256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv1): Conv2d(
256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
)
(conv2): Conv2d(
128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
)
(conv3): Conv2d(
128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
)
(1): BottleneckBlock(
(conv1): Conv2d(
512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
)
(conv2): Conv2d(
128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
)
(conv3): Conv2d(
128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
)
(2): BottleneckBlock(
(conv1): Conv2d(
512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
)
(conv2): Conv2d(
128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
)
(conv3): Conv2d(
128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
)
(3): BottleneckBlock(
(conv1): Conv2d(
512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
)
(conv2): Conv2d(
128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
)
(conv3): Conv2d(
128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
)
)
(res4): Sequential(
(0): BottleneckBlock(
(shortcut): Conv2d(
512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False
(norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
)
(conv1): Conv2d(
512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv2): Conv2d(
256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv3): Conv2d(
256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
)
)
(1): BottleneckBlock(
(conv1): Conv2d(
1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv2): Conv2d(
256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv3): Conv2d(
256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
)
)
(2): BottleneckBlock(
(conv1): Conv2d(
1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv2): Conv2d(
256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv3): Conv2d(
256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
)
)
(3): BottleneckBlock(
(conv1): Conv2d(
1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv2): Conv2d(
256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv3): Conv2d(
256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
)
)
(4): BottleneckBlock(
(conv1): Conv2d(
1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv2): Conv2d(
256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv3): Conv2d(
256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
)
)
(5): BottleneckBlock(
(conv1): Conv2d(
1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv2): Conv2d(
256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
)
(conv3): Conv2d(
256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
)
)
)
(res5): Sequential(
(0): BottleneckBlock(
(shortcut): Conv2d(
1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False
(norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
)
(conv1): Conv2d(
1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv2): Conv2d(
512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv3): Conv2d(
512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
)
)
(1): BottleneckBlock(
(conv1): Conv2d(
2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv2): Conv2d(
512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv3): Conv2d(
512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
)
)
(2): BottleneckBlock(
(conv1): Conv2d(
2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv2): Conv2d(
512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
)
(conv3): Conv2d(
512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
(norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
)
)
)
)
)
(pos_embeddings): Embedding(300, 256)
(init_proposal_boxes): Embedding(300, 4)
(IFE): ImgFeatExtractor()
(mask_E): Encoder(
(encoder): Sequential(
(0): Conv2d(1, 16, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=True)
(3): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=True)
(6): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ELU(alpha=True)
(9): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): ELU(alpha=True)
(12): Conv2d(128, 256, kernel_size=(7, 7), stride=(1, 1))
(13): View()
)
)
(mask_D): Decoder(
(decoder): Sequential(
(0): View()
(1): ConvTranspose2d(256, 128, kernel_size=(7, 7), stride=(1, 1))
(2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ELU(alpha=1.0, inplace=True)
(4): up_conv(
(up): Sequential(
(0): Upsample(scale_factor=2.0, mode=bilinear)
(1): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ELU(alpha=1.0, inplace=True)
)
)
(5): up_conv(
(up): Sequential(
(0): Upsample(scale_factor=2.0, mode=bilinear)
(1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ELU(alpha=1.0, inplace=True)
)
)
(6): up_conv(
(up): Sequential(
(0): Upsample(scale_factor=2.0, mode=bilinear)
(1): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ELU(alpha=1.0, inplace=True)
)
)
(7): up_conv(
(up): Sequential(
(0): Upsample(scale_factor=2.0, mode=bilinear)
(1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ELU(alpha=1.0, inplace=True)
)
)
(8): Conv2d(16, 1, kernel_size=(1, 1), stride=(1, 1))
(9): Sigmoid()
(10): View()
)
)
(head): DynamicHead(
(box_pooler): ROIPooler(
(level_poolers): ModuleList(
(0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=2, aligned=True)
(1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=2, aligned=True)
(2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=2, aligned=True)
(3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=2, aligned=True)
)
)
(mask_pooler): ROIPooler(
(level_poolers): ModuleList(
(0): ROIAlign(output_size=(28, 28), spatial_scale=0.25, sampling_ratio=2, aligned=True)
(1): ROIAlign(output_size=(28, 28), spatial_scale=0.125, sampling_ratio=2, aligned=True)
(2): ROIAlign(output_size=(28, 28), spatial_scale=0.0625, sampling_ratio=2, aligned=True)
(3): ROIAlign(output_size=(28, 28), spatial_scale=0.03125, sampling_ratio=2, aligned=True)
)
)
(head_series): ModuleList(
(0): RCNNHead(
(self_attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
)
(inst_interact): DynamicConv(
(dynamic_layer): Linear(in_features=256, out_features=32768, bias=True)
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(activation): ELU(alpha=1.0, inplace=True)
(out_layer): Linear(in_features=12544, out_features=256, bias=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
)
(linear1): Linear(in_features=256, out_features=2048, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
(linear2): Linear(in_features=2048, out_features=256, bias=True)
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(dropout1): Dropout(p=0.0, inplace=False)
(dropout2): Dropout(p=0.0, inplace=False)
(dropout3): Dropout(p=0.0, inplace=False)
(activation): ELU(alpha=1.0, inplace=True)
(cls_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(reg_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(mask_module): Sequential(
(0): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=True)
(3): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=True)
(6): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1))
)
(ret_roi_layer_1): conv_block(
(conv): Sequential(
(0): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(ret_roi_layer_2): conv_block(
(conv): Sequential(
(0): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(class_logits): Linear(in_features=256, out_features=5, bias=True)
(bboxes_delta): Linear(in_features=256, out_features=4, bias=True)
)
(1): RCNNHead(
(self_attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
)
(inst_interact): DynamicConv(
(dynamic_layer): Linear(in_features=256, out_features=32768, bias=True)
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(activation): ELU(alpha=1.0, inplace=True)
(out_layer): Linear(in_features=12544, out_features=256, bias=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
)
(linear1): Linear(in_features=256, out_features=2048, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
(linear2): Linear(in_features=2048, out_features=256, bias=True)
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(dropout1): Dropout(p=0.0, inplace=False)
(dropout2): Dropout(p=0.0, inplace=False)
(dropout3): Dropout(p=0.0, inplace=False)
(activation): ELU(alpha=1.0, inplace=True)
(cls_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(reg_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(mask_module): Sequential(
(0): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=True)
(3): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=True)
(6): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1))
)
(ret_roi_layer_1): conv_block(
(conv): Sequential(
(0): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(ret_roi_layer_2): conv_block(
(conv): Sequential(
(0): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(class_logits): Linear(in_features=256, out_features=5, bias=True)
(bboxes_delta): Linear(in_features=256, out_features=4, bias=True)
)
(2): RCNNHead(
(self_attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
)
(inst_interact): DynamicConv(
(dynamic_layer): Linear(in_features=256, out_features=32768, bias=True)
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(activation): ELU(alpha=1.0, inplace=True)
(out_layer): Linear(in_features=12544, out_features=256, bias=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
)
(linear1): Linear(in_features=256, out_features=2048, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
(linear2): Linear(in_features=2048, out_features=256, bias=True)
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(dropout1): Dropout(p=0.0, inplace=False)
(dropout2): Dropout(p=0.0, inplace=False)
(dropout3): Dropout(p=0.0, inplace=False)
(activation): ELU(alpha=1.0, inplace=True)
(cls_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(reg_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(mask_module): Sequential(
(0): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=True)
(3): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=True)
(6): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1))
)
(ret_roi_layer_1): conv_block(
(conv): Sequential(
(0): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(ret_roi_layer_2): conv_block(
(conv): Sequential(
(0): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(class_logits): Linear(in_features=256, out_features=5, bias=True)
(bboxes_delta): Linear(in_features=256, out_features=4, bias=True)
)
(3): RCNNHead(
(self_attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
)
(inst_interact): DynamicConv(
(dynamic_layer): Linear(in_features=256, out_features=32768, bias=True)
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(activation): ELU(alpha=1.0, inplace=True)
(out_layer): Linear(in_features=12544, out_features=256, bias=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
)
(linear1): Linear(in_features=256, out_features=2048, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
(linear2): Linear(in_features=2048, out_features=256, bias=True)
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(dropout1): Dropout(p=0.0, inplace=False)
(dropout2): Dropout(p=0.0, inplace=False)
(dropout3): Dropout(p=0.0, inplace=False)
(activation): ELU(alpha=1.0, inplace=True)
(cls_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(reg_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(mask_module): Sequential(
(0): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=True)
(3): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=True)
(6): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1))
)
(ret_roi_layer_1): conv_block(
(conv): Sequential(
(0): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(ret_roi_layer_2): conv_block(
(conv): Sequential(
(0): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(class_logits): Linear(in_features=256, out_features=5, bias=True)
(bboxes_delta): Linear(in_features=256, out_features=4, bias=True)
)
(4): RCNNHead(
(self_attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
)
(inst_interact): DynamicConv(
(dynamic_layer): Linear(in_features=256, out_features=32768, bias=True)
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(activation): ELU(alpha=1.0, inplace=True)
(out_layer): Linear(in_features=12544, out_features=256, bias=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
)
(linear1): Linear(in_features=256, out_features=2048, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
(linear2): Linear(in_features=2048, out_features=256, bias=True)
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(dropout1): Dropout(p=0.0, inplace=False)
(dropout2): Dropout(p=0.0, inplace=False)
(dropout3): Dropout(p=0.0, inplace=False)
(activation): ELU(alpha=1.0, inplace=True)
(cls_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(reg_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(mask_module): Sequential(
(0): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=True)
(3): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=True)
(6): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1))
)
(ret_roi_layer_1): conv_block(
(conv): Sequential(
(0): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(ret_roi_layer_2): conv_block(
(conv): Sequential(
(0): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(class_logits): Linear(in_features=256, out_features=5, bias=True)
(bboxes_delta): Linear(in_features=256, out_features=4, bias=True)
)
(5): RCNNHead(
(self_attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
)
(inst_interact): DynamicConv(
(dynamic_layer): Linear(in_features=256, out_features=32768, bias=True)
(norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(activation): ELU(alpha=1.0, inplace=True)
(out_layer): Linear(in_features=12544, out_features=256, bias=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
)
(linear1): Linear(in_features=256, out_features=2048, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
(linear2): Linear(in_features=2048, out_features=256, bias=True)
(norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(norm3): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(dropout1): Dropout(p=0.0, inplace=False)
(dropout2): Dropout(p=0.0, inplace=False)
(dropout3): Dropout(p=0.0, inplace=False)
(activation): ELU(alpha=1.0, inplace=True)
(cls_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(reg_module): ModuleList(
(0): Linear(in_features=256, out_features=256, bias=False)
(1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Linear(in_features=256, out_features=256, bias=False)
(4): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(5): ELU(alpha=1.0, inplace=True)
(6): Linear(in_features=256, out_features=256, bias=False)
(7): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(8): ELU(alpha=1.0, inplace=True)
)
(mask_module): Sequential(
(0): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=True)
(3): Conv2d(256, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=True)
(6): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1))
)
(ret_roi_layer_1): conv_block(
(conv): Sequential(
(0): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(ret_roi_layer_2): conv_block(
(conv): Sequential(
(0): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ELU(alpha=1.0, inplace=True)
(3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ELU(alpha=1.0, inplace=True)
)
)
(class_logits): Linear(in_features=256, out_features=5, bias=True)
(bboxes_delta): Linear(in_features=256, out_features=4, bias=True)
)
)
)
(criterion): SetCriterion(
(matcher): HungarianMatcher()
)
)
[08/25 11:15:53 d2.data.datasets.coco]: Loading /content/drive/MyDrive/imenselmi/ISTR_TRAIN/data/result/train.json takes 6.61 seconds.
[08/25 11:15:54 d2.data.datasets.coco]: Loaded 43480 images in COCO format from /content/drive/MyDrive/imenselmi/ISTR_TRAIN/data/result/train.json
[08/25 11:15:57 d2.data.build]: Removed 0 images with no usable annotations. 43480 images left.
[08/25 11:15:59 d2.data.build]: Distribution of instances among all 5 categories:
| category | #instances | category | #instances | category | #instances |
|:-------------:|:-------------|:-------------:|:-------------|:-------------:|:-------------|
| short_sleev.. | 18359 | long_sleeve.. | 14566 | long_sleeve.. | 10492 |
| shorts | 12123 | trousers | 18227 | | |
| total | 73767 | | | | |
pos_embeddings.weight
WARNING [08/25 11:16:04 fvcore.common.checkpoint]: The checkpoint state_dict contains keys that are not used by the model:
stem.fc.{bias, weight}
[08/25 11:16:04 d2.engine.train_loop]: Starting training from iteration 0
/usr/local/lib/python3.7/dist-packages/fvcore/transforms/transform.py:724: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the geoms
property to access the constituent parts of a multi-part geometry.
for poly in cropped:
/usr/local/lib/python3.7/dist-packages/torch/_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
return torch.floor_divide(self, other)
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.)
return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
ERROR [08/25 11:16:05 d2.engine.train_loop]: Exception during training:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/detectron2/engine/train_loop.py", line 149, in train
self.run_step()
File "/usr/local/lib/python3.7/dist-packages/detectron2/engine/defaults.py", line 494, in run_step
self._trainer.run_step()
File "/usr/local/lib/python3.7/dist-packages/detectron2/engine/train_loop.py", line 273, in run_step
loss_dict = self.model(data)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/content/drive/.shortcut-targets-by-id/190HFmYfsGdKfNWeUiqnpTgh7X3m3GFmF/ISTR_TRAIN/ISTR/projects/ISTR/istr/inseg.py", line 162, in forward
src = self.backbone(images.tensor)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/modeling/backbone/fpn.py", line 126, in forward
bottom_up_features = self.bottom_up(x)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/modeling/backbone/resnet.py", line 449, in forward
x = stage(x)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/modeling/backbone/resnet.py", line 201, in forward
out = self.conv3(out)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/layers/wrappers.py", line 110, in forward
x = self.norm(x)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/layers/batch_norm.py", line 53, in forward
return x * scale.to(out_dtype) + bias.to(out_dtype)
RuntimeError: CUDA out of memory. Tried to allocate 672.00 MiB (GPU 0; 15.78 GiB total capacity; 13.42 GiB already allocated; 50.75 MiB free; 14.41 GiB reserved in total by PyTorch)
[08/25 11:16:05 d2.engine.hooks]: Total training time: 0:00:01 (0:00:00 on hooks)
[08/25 11:16:05 d2.utils.events]: iter: 0 lr: N/A max_mem: 14075M
Traceback (most recent call last):
File "projects/ISTR/train_net.py", line 136, in
args=(args,),
File "/usr/local/lib/python3.7/dist-packages/detectron2/engine/launch.py", line 82, in launch
main_func(*args)
File "projects/ISTR/train_net.py", line 124, in main
return trainer.train()
File "/usr/local/lib/python3.7/dist-packages/detectron2/engine/defaults.py", line 484, in train
super().train(self.start_iter, self.max_iter)
File "/usr/local/lib/python3.7/dist-packages/detectron2/engine/train_loop.py", line 149, in train
self.run_step()
File "/usr/local/lib/python3.7/dist-packages/detectron2/engine/defaults.py", line 494, in run_step
self._trainer.run_step()
File "/usr/local/lib/python3.7/dist-packages/detectron2/engine/train_loop.py", line 273, in run_step
loss_dict = self.model(data)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/content/drive/.shortcut-targets-by-id/190HFmYfsGdKfNWeUiqnpTgh7X3m3GFmF/ISTR_TRAIN/ISTR/projects/ISTR/istr/inseg.py", line 162, in forward
src = self.backbone(images.tensor)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/modeling/backbone/fpn.py", line 126, in forward
bottom_up_features = self.bottom_up(x)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/modeling/backbone/resnet.py", line 449, in forward
x = stage(x)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/modeling/backbone/resnet.py", line 201, in forward
out = self.conv3(out)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/layers/wrappers.py", line 110, in forward
x = self.norm(x)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/detectron2/layers/batch_norm.py", line 53, in forward
return x * scale.to(out_dtype) + bias.to(out_dtype)
RuntimeError: CUDA out of memory. Tried to allocate 672.00 MiB (GPU 0; 15.78 GiB total capacity; 13.42 GiB already allocated; 50.75 MiB free; 14.41 GiB reserved in total by PyTorch)