"""Helper to facilitate the use of inputs and outputs of the models."""
# =============================================================================
# Copyright 2021 Henrique Morimitsu
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
import logging
from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
import torch
from ptlflow.data.flow_transforms import ToTensor
from ptlflow.models.base_model.base_model import BaseModel
from ptlflow.utils.utils import InputScaler
[docs]
class IOAdapter(object):
"""Handle the inputs and outputs of optical flow models."""
[docs]
def __init__(
self,
model: BaseModel,
input_size: Tuple[int, int],
target_size: Optional[Tuple[int, int]] = None,
target_scale_factor: Optional[float] = None,
interpolation_mode: str = "bilinear",
interpolation_align_corners: bool = False,
cuda: bool = False,
fp16: bool = False,
) -> None:
"""Initialize IOAdapter.
Parameters
----------
model : BaseModel
An instance of optical flow mode that will be used for estimation.
input_size : Tuple[int, int]
The shape of the original inputs, must be a tuple with at least two elements. It is assumed that the last two
elements are (height, with).
target_size : Optional[Tuple[int, int]], optional
If provided, the inputs will be resized to target_size. target_size is defined as a tuple (height, with).
target_scale_factor : Optional[float], default 1.0
This value is only used if size is None. The multiplier that will be applied to the original shape to scale
the input.
interpolation_mode : str, default 'bilinear'
How to perform the interpolation. It must be a value accepted by the 'mode' argument from
torch.nn.functional.interpolate function.
interpolation_align_corners : bool, default False
Whether the interpolation keep the corners aligned. As defined in torch.nn.functional.interpolate.
cuda : bool
If True, the input tensors are transferred to GPU (if a GPU is available).
"""
self.output_stride = model.output_stride
self.target_size = target_size
self.target_scale_factor = target_scale_factor
self.interpolation_mode = interpolation_mode
self.interpolation_align_corners = interpolation_align_corners
self.cuda = cuda
self.fp16 = fp16
self.transform = ToTensor()
self.scaler = None
if (target_size is not None and min(target_size) > 0) or (
target_scale_factor is not None and target_scale_factor > 0
):
self.scaler = InputScaler(
orig_shape=input_size,
size=target_size,
scale_factor=target_scale_factor,
interpolation_mode=interpolation_mode,
interpolation_align_corners=interpolation_align_corners,
)
[docs]
def unscale(
self, outputs: Dict[str, Any], image_only: bool = False
) -> Dict[str, torch.Tensor]:
"""Remove scaling to restore the original shapes.
The inputs and outputs obtained from prepare_inputs() may have been scaled.
This function can be used to revert those operations.
Typically, this function should be used on both the inputs and outputs of the model after the model generated the
predictions.
Parameters
----------
outputs : Dict[str, Any]
It can be either the inputs or the outputs of optical flow models. All tensors will have the extra scaling removed.
image_only : Optional[bool]
If True, only removes scaling from the images.
Returns
-------
Dict[str, Any]
The same tensors, with the padding and scaling removed.
See Also
--------
prepare_inputs
"""
for k, v in outputs.items():
if image_only and k != "images":
continue
if isinstance(v, torch.Tensor):
if self.scaler is not None:
v = self.scaler.unfill(v, is_flow=k.startswith("flow"))
outputs[k] = v
return outputs
def _to_cuda(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
if self.cuda:
if torch.cuda.is_available():
inputs = {
k: v.cuda() if isinstance(v, torch.Tensor) else v
for k, v in inputs.items()
}
if self.fp16:
inputs = {
k: v.half()
if (
isinstance(v, torch.Tensor)
and ("flow" in k or "image" in k)
)
else v
for k, v in inputs.items()
}
else:
logging.warning(
"IOAdapter was asked to use cuda, but torch.cuda.is_available() == False. Tensors will remain on CPU."
)
return inputs