init commit

This commit is contained in:
Yao Wang 2022-05-09 14:32:31 +02:00
commit d5d633b6c7
517 changed files with 18824 additions and 0 deletions

160
RecallNet/environment.yaml Normal file
View file

@ -0,0 +1,160 @@
name: tf-cuda9
channels:
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _tflow_select=2.1.0=gpu
- absl-py=0.11.0=py37h06a4308_0
- argon2-cffi=20.1.0=py37h8f50634_2
- astor=0.8.1=py37_0
- async_generator=1.10=py_0
- attrs=20.2.0=pyh9f0ad1d_0
- backports=1.0=py_2
- backports.functools_lru_cache=1.6.1=py_0
- blas=1.0=mkl
- bleach=3.2.1=pyh9f0ad1d_0
- c-ares=1.16.1=h7b6447c_0
- ca-certificates=2021.4.13=h06a4308_1
- certifi=2020.12.5=py37h06a4308_0
- cffi=1.14.3=py37he30daa8_0
- cudatoolkit=9.0=h13b8566_0
- cudnn=7.3.1=cuda9.0_0
- cupti=9.0.176=0
- decorator=4.4.2=py_0
- defusedxml=0.6.0=py_0
- entrypoints=0.3=py37hc8dfbb8_1002
- gast=0.4.0=py_0
- google-pasta=0.2.0=py_0
- grpcio=1.31.0=py37hf8bcb03_0
- h5py=2.10.0=py37hd6299e0_1
- hdf5=1.10.6=hb1b8bf9_0
- importlib-metadata=2.0.0=py_1
- importlib_metadata=2.0.0=1
- intel-openmp=2020.2=254
- ipykernel=5.3.4=py37hc6149b9_1
- ipython=5.8.0=py37_1
- ipython_genutils=0.2.0=py_1
- ipywidgets=7.5.1=pyh9f0ad1d_1
- jinja2=2.11.2=pyh9f0ad1d_0
- jsonschema=3.2.0=py_2
- jupyter_client=6.1.7=py_0
- jupyter_core=4.6.3=py37hc8dfbb8_2
- jupyterlab_pygments=0.1.2=pyh9f0ad1d_0
- keras=2.3.1=0
- keras-applications=1.0.8=py_1
- keras-base=2.3.1=py37_0
- keras-preprocessing=1.1.0=py_1
- ld_impl_linux-64=2.33.1=h53a641e_7
- libedit=3.1.20191231=h14c3975_1
- libffi=3.3=he6710b0_2
- libgcc-ng=9.1.0=hdf63c60_0
- libgfortran-ng=7.3.0=hdf63c60_0
- libprotobuf=3.13.0.1=hd408876_0
- libsodium=1.0.18=h516909a_1
- libstdcxx-ng=9.1.0=hdf63c60_0
- markdown=3.3.2=py37_0
- markupsafe=1.1.1=py37hb5d75c8_2
- mistune=0.8.4=py37h8f50634_1002
- mkl=2020.2=256
- mkl-service=2.3.0=py37he904b0f_0
- mkl_fft=1.2.0=py37h23d657b_0
- mkl_random=1.1.1=py37h0573a6f_0
- nbclient=0.5.1=py_0
- nbconvert=6.0.7=py37hc8dfbb8_2
- nbformat=5.0.8=py_0
- ncurses=6.2=he6710b0_1
- nest-asyncio=1.4.1=py_0
- notebook=6.1.4=py37hc8dfbb8_1
- numpy=1.19.2=py37h54aff64_0
- numpy-base=1.19.2=py37hfa32c7d_0
- openssl=1.1.1k=h27cfd23_0
- packaging=20.4=pyh9f0ad1d_0
- pandoc=2.11.0.4=hd18ef5c_0
- pandocfilters=1.4.2=py_1
- pexpect=4.8.0=pyh9f0ad1d_2
- pickleshare=0.7.5=py_1003
- pip=20.2.4=py37_0
- prometheus_client=0.8.0=pyh9f0ad1d_0
- prompt_toolkit=1.0.15=py_1
- protobuf=3.13.0.1=py37he6710b0_1
- ptyprocess=0.6.0=py_1001
- pycparser=2.20=pyh9f0ad1d_2
- pygments=2.7.2=py_0
- pyparsing=2.4.7=pyh9f0ad1d_0
- pyrsistent=0.17.3=py37h8f50634_1
- python=3.7.9=h7579374_0
- python-dateutil=2.8.1=py_0
- python_abi=3.7=1_cp37m
- pyyaml=5.3.1=py37h7b6447c_1
- pyzmq=19.0.2=py37hac76be4_2
- readline=8.0=h7b6447c_0
- send2trash=1.5.0=py_0
- setuptools=50.3.0=py37hb0f4dca_1
- simplegeneric=0.8.1=py_1
- six=1.15.0=py_0
- sqlite=3.33.0=h62c20be_0
- tensorboard=1.14.0=py37hf484d3e_0
- tensorflow=1.14.0=gpu_py37hae64822_0
- tensorflow-base=1.14.0=gpu_py37h8f37b9b_0
- tensorflow-estimator=1.14.0=py_0
- tensorflow-gpu=1.14.0=h0d30ee6_0
- termcolor=1.1.0=py37_1
- terminado=0.9.1=py37hc8dfbb8_1
- testpath=0.4.4=py_0
- tk=8.6.10=hbc83047_0
- tornado=6.0.4=py37h8f50634_2
- traitlets=5.0.5=py_0
- wcwidth=0.2.5=pyh9f0ad1d_2
- webencodings=0.5.1=py_1
- werkzeug=1.0.1=py_0
- wheel=0.35.1=py_0
- widgetsnbextension=3.5.1=py37hc8dfbb8_4
- wrapt=1.12.1=py37h7b6447c_1
- xz=5.2.5=h7b6447c_0
- yaml=0.2.5=h7b6447c_0
- zeromq=4.3.3=he1b5a44_2
- zipp=3.4.0=pyhd3eb1b0_0
- zlib=1.2.11=h7b6447c_3
- pip:
- adjusttext==0.7.3
- alignment==0.0.1
- chardet==4.0.0
- cmasher==1.6.2
- colorspacious==1.1.2
- cycler==0.10.0
- e13tools==0.9.6
- editdistance==0.5.3
- fastdtw==0.3.4
- idna==2.10
- imageio==2.9.0
- joblib==1.0.0
- kiwisolver==1.2.0
- mat4py==0.5.0
- matplotlib==3.4.3
- natsort==7.1.1
- networkx==2.5
- opencv-python==4.4.0.44
- pandas==1.2.1
- patsy==0.5.1
- pillow==8.0.1
- pytz==2021.1
- pywavelets==1.1.1
- requests==2.25.1
- researchpy==0.3.2
- salicon==1.0
- scikit-image==0.18.1
- scikit-learn==0.24.1
- scipy==1.2.0
- seaborn==0.11.1
- simplejson==3.17.5
- sklearn==0.0
- spotlight==2.3.1
- statsmodels==0.12.2
- threadpoolctl==2.1.0
- tifffile==2021.3.17
- tqdm==4.51.0
- urllib3==1.26.4
- wget==3.2
prefix: /netpool/homes/wangyo/.conda/envs/tf-cuda9

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,183 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
"/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
" np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
]
}
],
"source": [
"from xception_custom import Xception_wrapper"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from keras.layers import Input"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[[155 225 83]\n",
" [174 33 86]\n",
" [ 24 223 10]\n",
" ...\n",
" [147 233 79]\n",
" [232 187 173]\n",
" [ 69 126 85]]\n",
"\n",
" [[166 203 47]\n",
" [111 65 37]\n",
" [210 182 244]\n",
" ...\n",
" [154 62 70]\n",
" [ 62 93 101]\n",
" [132 231 126]]\n",
"\n",
" [[ 30 110 125]\n",
" [242 45 71]\n",
" [150 10 217]\n",
" ...\n",
" [ 38 165 128]\n",
" [ 64 58 127]\n",
" [179 174 72]]\n",
"\n",
" ...\n",
"\n",
" [[159 2 99]\n",
" [201 220 158]\n",
" [170 172 13]\n",
" ...\n",
" [ 79 72 65]\n",
" [ 10 228 7]\n",
" [ 99 60 129]]\n",
"\n",
" [[187 249 6]\n",
" [ 57 166 83]\n",
" [187 243 66]\n",
" ...\n",
" [109 184 147]\n",
" [142 158 83]\n",
" [190 61 30]]\n",
"\n",
" [[146 238 74]\n",
" [156 20 43]\n",
" [ 55 217 43]\n",
" ...\n",
" [208 181 141]\n",
" [196 88 15]\n",
" [132 225 63]]]\n"
]
},
{
"ename": "TypeError",
"evalue": "Error converting shape to a TensorShape: only size-1 arrays can be converted to Python scalars.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mmake_shape\u001b[0;34m(v, arg_name)\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 146\u001b[0;31m \u001b[0mshape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_shape\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_shape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 147\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/tensor_shape.py\u001b[0m in \u001b[0;36mas_shape\u001b[0;34m(shape)\u001b[0m\n\u001b[1;32m 1203\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1204\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mTensorShape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1205\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/tensor_shape.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, dims)\u001b[0m\n\u001b[1;32m 773\u001b[0m \u001b[0;31m# Got a list of dimensions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 774\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dims\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mas_dimension\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0md\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdims_iter\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 775\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/tensor_shape.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 773\u001b[0m \u001b[0;31m# Got a list of dimensions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 774\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dims\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mas_dimension\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0md\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdims_iter\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 775\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/tensor_shape.py\u001b[0m in \u001b[0;36mas_dimension\u001b[0;34m(value)\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 716\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mDimension\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 717\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/framework/tensor_shape.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m if (not isinstance(value, compat.bytes_or_text_types) and\n",
"\u001b[0;31mTypeError\u001b[0m: only size-1 arrays can be converted to Python scalars",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-8f06c869009e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mxception\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mXception_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minclude_top\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'imagenet'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_tensor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpooling\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'xception:'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mxception\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mtest_xception_shape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-8-8f06c869009e>\u001b[0m in \u001b[0;36mtest_xception_shape\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0minput_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m240\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m320\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0minp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mInput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mxception\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mXception_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minclude_top\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'imagenet'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_tensor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpooling\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'xception:'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mxception\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/keras/engine/input_layer.py\u001b[0m in \u001b[0;36mInput\u001b[0;34m(shape, batch_shape, name, dtype, sparse, tensor)\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[0msparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msparse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 178\u001b[0;31m input_tensor=tensor)\n\u001b[0m\u001b[1;32m 179\u001b[0m \u001b[0;31m# Return tensor including _keras_shape and _keras_history.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0;31m# Note that in this case train_output and test_output are the same pointer.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/keras/legacy/interfaces.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 89\u001b[0m warnings.warn('Update your `' + object_name + '` call to the ' +\n\u001b[1;32m 90\u001b[0m 'Keras 2 API: ' + signature, stacklevel=2)\n\u001b[0;32m---> 91\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 92\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_original_function\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/keras/engine/input_layer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, input_shape, batch_size, batch_input_shape, dtype, input_tensor, sparse, name)\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0msparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 87\u001b[0;31m name=self.name)\n\u001b[0m\u001b[1;32m 88\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_placeholder\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36mplaceholder\u001b[0;34m(shape, ndim, dtype, sparse, name)\u001b[0m\n\u001b[1;32m 734\u001b[0m \u001b[0mdtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloatx\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 735\u001b[0m x = tf_keras_backend.placeholder(\n\u001b[0;32m--> 736\u001b[0;31m shape=shape, ndim=ndim, dtype=dtype, sparse=sparse, name=name)\n\u001b[0m\u001b[1;32m 737\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 738\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mndim\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/keras/backend.py\u001b[0m in \u001b[0;36mplaceholder\u001b[0;34m(shape, ndim, dtype, sparse, name)\u001b[0m\n\u001b[1;32m 996\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marray_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparse_placeholder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 997\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 998\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marray_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplaceholder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 999\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1000\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py\u001b[0m in \u001b[0;36mplaceholder\u001b[0;34m(dtype, shape, name)\u001b[0m\n\u001b[1;32m 2141\u001b[0m \"eager execution.\")\n\u001b[1;32m 2142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2143\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgen_array_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplaceholder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2145\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py\u001b[0m in \u001b[0;36mplaceholder\u001b[0;34m(dtype, shape, name)\u001b[0m\n\u001b[1;32m 6258\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6259\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6260\u001b[0;31m \u001b[0mshape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_execute\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_shape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"shape\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6261\u001b[0m _, _, _op = _op_def_lib._apply_op_helper(\n\u001b[1;32m 6262\u001b[0m \"Placeholder\", dtype=dtype, shape=shape, name=name)\n",
"\u001b[0;32m/netpool/homes/wangyo/.conda/envs/tf-cuda9/lib/python3.7/site-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mmake_shape\u001b[0;34m(v, arg_name)\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_shape\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_shape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 148\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Error converting %s to a TensorShape: %s.\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0marg_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 149\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m raise ValueError(\"Error converting %s to a TensorShape: %s.\" % (arg_name,\n",
"\u001b[0;31mTypeError\u001b[0m: Error converting shape to a TensorShape: only size-1 arrays can be converted to Python scalars."
]
}
],
"source": [
"def test_xception_shape():\n",
" input_ = np.random.randint(0,256, (240,320,3))\n",
" print(input_)\n",
" inp = Input(input_)\n",
" xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)\n",
" print('xception:',xception.output.shape)\n",
"test_xception_shape()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,717 @@
# -*- coding: utf-8 -*-
"""Convolutional-recurrent layers.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from keras import backend as K
from keras import activations
from keras import initializers
from keras import regularizers
from keras import constraints
#from keras.layers.recurrent import _generate_dropout_mask
#from keras.layers.recurrent import _standardize_args
import numpy as np
import warnings
from keras.engine.base_layer import InputSpec, Layer
from keras.utils import conv_utils
#from keras.legacy import interfaces
#from keras.legacy.layers import Recurrent, ConvRecurrent2D
from keras.layers.recurrent import RNN
from keras.utils.generic_utils import has_arg
from keras.utils.generic_utils import to_list
from keras.utils.generic_utils import transpose_shape
from tensorflow.python.keras.layers.convolutional_recurrent import ConvRNN2D
def _generate_dropout_mask(ones, rate, training=None, count=1):
def dropped_inputs():
return K.dropout(ones, rate)
class AttentiveConvLSTM2DCell(Layer):
def __init__(self,
filters,
attentive_filters,
kernel_size,
attentive_kernel_size,
strides=(1, 1),
padding='valid',
data_format=None,
dilation_rate=(1, 1),
activation='tanh',
recurrent_activation='hard_sigmoid',
attentive_activation='tanh',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
attentive_initializer='zeros',
bias_initializer='zeros',
unit_forget_bias=True,
kernel_regularizer=None,
recurrent_regularizer=None,
attentive_regularizer=None,
bias_regularizer=None,
kernel_constraint=None,
recurrent_constraint=None,
attentive_constraint=None,
bias_constraint=None,
dropout=0.,
recurrent_dropout=0.,
attentive_dropout=0.,
**kwargs):
super(AttentiveConvLSTM2DCell, self).__init__(**kwargs)
self.filters = filters
self.attentive_filters = attentive_filters
self.kernel_size = conv_utils.normalize_tuple(kernel_size, 2, 'kernel_size')
self.attentive_kernel_size = conv_utils.normalize_tuple(attentive_kernel_size, 2, 'attentive_kernel_size')
self.strides = conv_utils.normalize_tuple(strides, 2, 'strides')
self.padding = conv_utils.normalize_padding(padding)
self.data_format = K.normalize_data_format(data_format)
self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, 2,
'dilation_rate')
self.activation = activations.get(activation)
self.recurrent_activation = activations.get(recurrent_activation)
self.attentive_activation = activations.get(attentive_activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.recurrent_initializer = initializers.get(recurrent_initializer)
self.attentive_initializer = initializers.get(attentive_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.unit_forget_bias = unit_forget_bias
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
self.attentive_regularizer = regularizers.get(attentive_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.recurrent_constraint = constraints.get(recurrent_constraint)
self.attentive_constraint = constraints.get(attentive_constraint)
self.bias_constraint = constraints.get(bias_constraint)
if K.backend() == 'theano' and (dropout or recurrent_dropout):
warnings.warn(
'RNN dropout is no longer supported with the Theano backend '
'due to technical limitations. '
'You can either set `dropout` and `recurrent_dropout` to 0, '
'or use the TensorFlow backend.')
dropout = 0.
recurrent_dropout = 0.
self.dropout = min(1., max(0., dropout))
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
self.attentive_dropout = min(1., max(0., attentive_dropout))
self.state_size = (self.filters, self.filters)
self._dropout_mask = None
self._recurrent_dropout_mask = None
self._attentive_dropout_mask = None
def build(self, input_shape):
if self.data_format == 'channels_first':
channel_axis = 1
else:
channel_axis = -1
if input_shape[channel_axis] is None:
raise ValueError('The channel dimension of the inputs '
'should be defined. Found `None`.')
input_dim = input_shape[channel_axis]
kernel_shape = self.kernel_size + (input_dim, self.filters * 4)
self.kernel_shape = kernel_shape
print('kernel_shape', kernel_shape)
recurrent_kernel_shape = self.kernel_size + (self.filters, self.filters * 4)
input_attentive_kernel_shape = self.attentive_kernel_size + (input_dim, self.attentive_filters)
hidden_attentive_kernel_shape = self.attentive_kernel_size + (self.filters, self.attentive_filters)
squeeze_attentive_kernel_shape = self.attentive_kernel_size + (self.attentive_filters, 1)
self.kernel = self.add_weight(shape=kernel_shape,
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.recurrent_kernel = self.add_weight(
shape=recurrent_kernel_shape,
initializer=self.recurrent_initializer,
name='recurrent_kernel',
regularizer=self.recurrent_regularizer,
constraint=self.recurrent_constraint)
self.input_attentive_kernel = self.add_weight(
shape=input_attentive_kernel_shape,
initializer=self.attentive_initializer,
name='input_attentive_kernel',
regularizer=self.attentive_regularizer,
constraint=self.attentive_constraint)
self.hidden_attentive_kernel = self.add_weight(
shape=hidden_attentive_kernel_shape,
initializer=self.attentive_initializer,
name='hidden_attentive_kernel',
regularizer=self.attentive_regularizer,
constraint=self.attentive_constraint)
self.squeeze_attentive_kernel = self.add_weight(
shape=squeeze_attentive_kernel_shape,
initializer=self.attentive_initializer,
name='squeeze_attentive_kernel',
regularizer=self.attentive_regularizer,
constraint=self.attentive_constraint)
if self.use_bias:
if self.unit_forget_bias:
def bias_initializer(_, *args, **kwargs):
return K.concatenate([
self.bias_initializer((self.filters,), *args, **kwargs),
initializers.Ones()((self.filters,), *args, **kwargs),
self.bias_initializer((self.filters * 2,), *args, **kwargs),
])
else:
bias_initializer = self.bias_initializer
self.bias = self.add_weight(
shape=(self.filters * 4,),
name='bias',
initializer=bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
self.attentive_bias = self.add_weight(
shape=(self.attentive_filters * 2,),
name='attentive_bias',
initializer=bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.kernel_i = self.kernel[:, :, :, :self.filters]
self.recurrent_kernel_i = self.recurrent_kernel[:, :, :, :self.filters]
self.kernel_f = self.kernel[:, :, :, self.filters: self.filters * 2]
self.recurrent_kernel_f = (self.recurrent_kernel[:, :, :, self.filters:
self.filters * 2])
self.kernel_c = self.kernel[:, :, :, self.filters * 2: self.filters * 3]
self.recurrent_kernel_c = (self.recurrent_kernel[:, :, :, self.filters * 2:
self.filters * 3])
self.kernel_o = self.kernel[:, :, :, self.filters * 3:]
self.recurrent_kernel_o = self.recurrent_kernel[:, :, :, self.filters * 3:]
if self.use_bias:
self.bias_i = self.bias[:self.filters]
self.bias_f = self.bias[self.filters: self.filters * 2]
self.bias_c = self.bias[self.filters * 2: self.filters * 3]
self.bias_o = self.bias[self.filters * 3:]
self.bias_wa = self.attentive_bias[:self.attentive_filters ]
self.bias_ua = self.attentive_bias[self.attentive_filters : self.attentive_filters * 2]
else:
self.bias_i = None
self.bias_f = None
self.bias_c = None
self.bias_o = None
self.built = True
def call(self, inputs, states, training=None):
if 0 < self.dropout < 1 and self._dropout_mask is None:
self._dropout_mask = _generate_dropout_mask(
K.ones_like(inputs),
self.dropout,
training=training,
count=4)
if (0 < self.recurrent_dropout < 1 and
self._recurrent_dropout_mask is None):
self._recurrent_dropout_mask = _generate_dropout_mask(
K.ones_like(states[1]),
self.recurrent_dropout,
training=training,
count=4)
# if (0 < self.attentive_dropout < 1 and self._attentive_dropout_mask is None):
# self._attentive_dropout_mask = _generate_dropout_mask(
# K.ones_like(inputs),
# self.attentive_dropout,
# training=training,
# count=4)
# dropout matrices for input units
dp_mask = self._dropout_mask
# dropout matrices for recurrent units
rec_dp_mask = self._recurrent_dropout_mask
# dropout matrices for attentive units
# att_dp_mask = self._attentive_dropout_mask
h_tm1 = states[0] # previous memory state
c_tm1 = states[1] # previous carry state
##### ATTENTION MECHANISM
h_and_x = self.input_conv(h_tm1, self.hidden_attentive_kernel, self.bias_ua, padding='same') + self.input_conv(inputs, self.input_attentive_kernel, self.bias_wa, padding='same')
e = self.recurrent_conv(self.attentive_activation(h_and_x), self.squeeze_attentive_kernel)
a = K.reshape(K.softmax(K.batch_flatten(e)), K.shape(e))
inputs = inputs * K.repeat_elements(a, inputs.shape[-1], -1)
##### END OF ATTENTION MECHANISM
if 0 < self.dropout < 1.:
inputs_i = inputs * dp_mask[0]
inputs_f = inputs * dp_mask[1]
inputs_c = inputs * dp_mask[2]
inputs_o = inputs * dp_mask[3]
else:
inputs_i = inputs
inputs_f = inputs
inputs_c = inputs
inputs_o = inputs
if 0 < self.recurrent_dropout < 1.:
h_tm1_i = h_tm1 * rec_dp_mask[0]
h_tm1_f = h_tm1 * rec_dp_mask[1]
h_tm1_c = h_tm1 * rec_dp_mask[2]
h_tm1_o = h_tm1 * rec_dp_mask[3]
else:
h_tm1_i = h_tm1
h_tm1_f = h_tm1
h_tm1_c = h_tm1
h_tm1_o = h_tm1
x_i = self.input_conv(inputs_i, self.kernel_i, self.bias_i,
padding=self.padding)
x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f,
padding=self.padding)
x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c,
padding=self.padding)
x_o = self.input_conv(inputs_o, self.kernel_o, self.bias_o,
padding=self.padding)
h_i = self.recurrent_conv(h_tm1_i,
self.recurrent_kernel_i)
h_f = self.recurrent_conv(h_tm1_f,
self.recurrent_kernel_f)
h_c = self.recurrent_conv(h_tm1_c,
self.recurrent_kernel_c)
h_o = self.recurrent_conv(h_tm1_o,
self.recurrent_kernel_o)
i = self.recurrent_activation(x_i + h_i)
f = self.recurrent_activation(x_f + h_f)
c = f * c_tm1 + i * self.activation(x_c + h_c)
o = self.recurrent_activation(x_o + h_o)
h = o * self.activation(c)
if 0 < self.dropout + self.recurrent_dropout:
if training is None:
h._uses_learning_phase = True
return h, [h, c]
def input_conv(self, x, w, b=None, padding='valid'):
conv_out = K.conv2d(x, w, strides=self.strides,
padding=padding,
data_format=self.data_format,
dilation_rate=self.dilation_rate)
if b is not None:
conv_out = K.bias_add(conv_out, b,
data_format=self.data_format)
return conv_out
def recurrent_conv(self, x, w):
conv_out = K.conv2d(x, w, strides=(1, 1),
padding='same',
data_format=self.data_format)
return conv_out
def get_config(self):
config = {'filters': self.filters,
'attentive_filters': self.attentive_filters,
'kernel_size': self.kernel_size,
'attentive_kernel_size': self.attentive_kernel_size,
'strides': self.strides,
'padding': self.padding,
'data_format': self.data_format,
'dilation_rate': self.dilation_rate,
'activation': activations.serialize(self.activation),
'recurrent_activation': activations.serialize(
self.recurrent_activation),
'attentive_activation': activations.serialize(
self.attentive_activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(
self.kernel_initializer),
'recurrent_initializer': initializers.serialize(
self.recurrent_initializer),
'attentive_initializer': initializers.serialize(
self.attentive_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'unit_forget_bias': self.unit_forget_bias,
'kernel_regularizer': regularizers.serialize(
self.kernel_regularizer),
'recurrent_regularizer': regularizers.serialize(
self.recurrent_regularizer),
'attentive_regularizer': regularizers.serialize(
self.attentive_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'kernel_constraint': constraints.serialize(
self.kernel_constraint),
'recurrent_constraint': constraints.serialize(
self.recurrent_constraint),
'attentive_constraint': constraints.serialize(
self.attentive_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint),
'dropout': self.dropout,
'recurrent_dropout': self.recurrent_dropout,
'attentive_dropout': self.attentive_dropout}
base_config = super(AttentiveConvLSTM2DCell, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class AttentiveConvLSTM2D(ConvRNN2D):
"""Convolutional LSTM.
It is similar to an LSTM layer, but the input transformations
and recurrent transformations are both convolutional.
Arguments:
filters: Integer, the dimensionality of the output space
(i.e. the number of output filters in the convolution).
kernel_size: An integer or tuple/list of n integers, specifying the
dimensions of the convolution window.
strides: An integer or tuple/list of n integers,
specifying the strides of the convolution.
Specifying any stride value != 1 is incompatible with specifying
any `dilation_rate` value != 1.
padding: One of `"valid"` or `"same"` (case-insensitive).
data_format: A string,
one of `channels_last` (default) or `channels_first`.
The ordering of the dimensions in the inputs.
`channels_last` corresponds to inputs with shape
`(batch, time, ..., channels)`
while `channels_first` corresponds to
inputs with shape `(batch, time, channels, ...)`.
It defaults to the `image_data_format` value found in your
Keras config file at `~/.keras/keras.json`.
If you never set it, then it will be "channels_last".
dilation_rate: An integer or tuple/list of n integers, specifying
the dilation rate to use for dilated convolution.
Currently, specifying any `dilation_rate` value != 1 is
incompatible with specifying any `strides` value != 1.
activation: Activation function to use.
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`).
recurrent_activation: Activation function to use
for the recurrent step.
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix,
used for the linear transformation of the inputs.
recurrent_initializer: Initializer for the `recurrent_kernel`
weights matrix,
used for the linear transformation of the recurrent state.
bias_initializer: Initializer for the bias vector.
unit_forget_bias: Boolean.
If True, add 1 to the bias of the forget gate at initialization.
Use in combination with `bias_initializer="zeros"`.
This is recommended in [Jozefowicz et al.]
(http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix.
recurrent_regularizer: Regularizer function applied to
the `recurrent_kernel` weights matrix.
bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to.
kernel_constraint: Constraint function applied to
the `kernel` weights matrix.
recurrent_constraint: Constraint function applied to
the `recurrent_kernel` weights matrix.
bias_constraint: Constraint function applied to the bias vector.
return_sequences: Boolean. Whether to return the last output
in the output sequence, or the full sequence.
go_backwards: Boolean (default False).
If True, process the input sequence backwards.
stateful: Boolean (default False). If True, the last state
for each sample at index i in a batch will be used as initial
state for the sample of index i in the following batch.
dropout: Float between 0 and 1.
Fraction of the units to drop for
the linear transformation of the inputs.
recurrent_dropout: Float between 0 and 1.
Fraction of the units to drop for
the linear transformation of the recurrent state.
Input shape:
- if data_format='channels_first'
5D tensor with shape:
`(samples, time, channels, rows, cols)`
- if data_format='channels_last'
5D tensor with shape:
`(samples, time, rows, cols, channels)`
Output shape:
- if `return_sequences`
- if data_format='channels_first'
5D tensor with shape:
`(samples, time, filters, output_row, output_col)`
- if data_format='channels_last'
5D tensor with shape:
`(samples, time, output_row, output_col, filters)`
- else
- if data_format ='channels_first'
4D tensor with shape:
`(samples, filters, output_row, output_col)`
- if data_format='channels_last'
4D tensor with shape:
`(samples, output_row, output_col, filters)`
where o_row and o_col depend on the shape of the filter and
the padding
Raises:
ValueError: in case of invalid constructor arguments.
References:
- [Convolutional LSTM Network: A Machine Learning Approach for
Precipitation Nowcasting](http://arxiv.org/abs/1506.04214v1)
The current implementation does not include the feedback loop on the
cells output.
"""
def __init__(self,
filters,
attentive_filters,
kernel_size,
attentive_kernel_size,
strides=(1, 1),
padding='valid',
data_format=None,
dilation_rate=(1, 1),
activation='tanh',
recurrent_activation='hard_sigmoid',
attentive_activation='tanh',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
attentive_initializer='zeros',
bias_initializer='zeros',
unit_forget_bias=True,
kernel_regularizer=None,
recurrent_regularizer=None,
attentive_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
recurrent_constraint=None,
attentive_constraint=None,
bias_constraint=None,
return_sequences=False,
go_backwards=False,
stateful=False,
dropout=0.,
recurrent_dropout=0.,
attentive_dropout=0.,
**kwargs):
cell = AttentiveConvLSTM2DCell(filters=filters,
attentive_filters=attentive_filters,
kernel_size=kernel_size,
attentive_kernel_size=attentive_kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
recurrent_activation=recurrent_activation,
attentive_activation=attentive_activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
recurrent_initializer=recurrent_initializer,
attentive_initializer=attentive_initializer,
bias_initializer=bias_initializer,
unit_forget_bias=unit_forget_bias,
kernel_regularizer=kernel_regularizer,
recurrent_regularizer=recurrent_regularizer,
attentive_regularizer=attentive_regularizer,
bias_regularizer=bias_regularizer,
kernel_constraint=kernel_constraint,
recurrent_constraint=recurrent_constraint,
attentive_constraint=attentive_constraint,
bias_constraint=bias_constraint,
dropout=dropout,
recurrent_dropout=recurrent_dropout,
attentive_dropout=attentive_dropout)
super(AttentiveConvLSTM2D, self).__init__(cell,
return_sequences=return_sequences,
go_backwards=go_backwards,
stateful=stateful,
**kwargs)
self.activity_regularizer = regularizers.get(activity_regularizer)
def call(self, inputs, mask=None, training=None, initial_state=None):
return super(AttentiveConvLSTM2D, self).call(inputs,
mask=mask,
training=training,
initial_state=initial_state)
@property
def filters(self):
return self.cell.filters
@property
def attentive_filters(self):
return self.cell.attentive_filters
@property
def kernel_size(self):
return self.cell.kernel_size
@property
def attentive_kernel_size(self):
return self.cell.attentive_kernel_size
@property
def strides(self):
return self.cell.strides
@property
def padding(self):
return self.cell.padding
@property
def data_format(self):
return self.cell.data_format
@property
def dilation_rate(self):
return self.cell.dilation_rate
@property
def activation(self):
return self.cell.activation
@property
def recurrent_activation(self):
return self.cell.recurrent_activation
@property
def attentive_activation(self):
return self.cell.attentive_activation
@property
def use_bias(self):
return self.cell.use_bias
@property
def kernel_initializer(self):
return self.cell.kernel_initializer
@property
def recurrent_initializer(self):
return self.cell.recurrent_initializer
@property
def attentive_initializer(self):
return self.cell.attentive_initializer
@property
def bias_initializer(self):
return self.cell.bias_initializer
@property
def unit_forget_bias(self):
return self.cell.unit_forget_bias
@property
def kernel_regularizer(self):
return self.cell.kernel_regularizer
@property
def recurrent_regularizer(self):
return self.cell.recurrent_regularizer
@property
def attentive_regularizer(self):
return self.cell.attentive_regularizer
@property
def bias_regularizer(self):
return self.cell.bias_regularizer
@property
def kernel_constraint(self):
return self.cell.kernel_constraint
@property
def recurrent_constraint(self):
return self.cell.recurrent_constraint
@property
def attentive_constraint(self):
return self.cell.attentive_constraint
@property
def bias_constraint(self):
return self.cell.bias_constraint
@property
def dropout(self):
return self.cell.dropout
@property
def recurrent_dropout(self):
return self.cell.recurrent_dropout
@property
def attentive_dropout(self):
return self.cell.attentive_dropout
def get_config(self):
config = {'filters': self.filters,
'attentive_filters': self.attentive_filters,
'kernel_size': self.kernel_size,
'attentive_kernel_size': self.attentive_kernel_size,
'strides': self.strides,
'padding': self.padding,
'data_format': self.data_format,
'dilation_rate': self.dilation_rate,
'activation': activations.serialize(self.activation),
'recurrent_activation': activations.serialize(
self.recurrent_activation),
'attentive_activation': activations.serialize(
self.attentive_activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(
self.kernel_initializer),
'recurrent_initializer': initializers.serialize(
self.recurrent_initializer),
'attentive_initializer': initializers.serialize(
self.attentive_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'unit_forget_bias': self.unit_forget_bias,
'kernel_regularizer': regularizers.serialize(
self.kernel_regularizer),
'recurrent_regularizer': regularizers.serialize(
self.recurrent_regularizer),
'attentive_regularizer': regularizers.serialize(
self.attentive_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(
self.activity_regularizer),
'kernel_constraint': constraints.serialize(
self.kernel_constraint),
'recurrent_constraint': constraints.serialize(
self.recurrent_constraint),
'attentive_constraint': constraints.serialize(
self.attentive_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint),
'dropout': self.dropout,
'recurrent_dropout': self.recurrent_dropout,
'attentive_dropout': self.attentive_dropout}
base_config = super(AttentiveConvLSTM2D, self).get_config()
del base_config['cell']
return dict(list(base_config.items()) + list(config.items()))
@classmethod
def from_config(cls, config):
return cls(**config)

80
RecallNet/src/cb.py Normal file
View file

@ -0,0 +1,80 @@
import keras
import matplotlib.pyplot as plt
from IPython.display import clear_output
import os
import keras
from keras.callbacks import ModelCheckpoint
import numpy as np
import math
class Unfreeze(keras.callbacks.Callback):
def __init__(self,it_to_unfreeze):
self.it_to_unfreeze = it_to_unfreeze
self.c=0
self.frozen=True
def on_batch_end(self, batch, logs=None):
self.c+=1
if not self.c > self.it_to_unfreeze and self.frozen:
print('Iteration %d reached: UNFREEZING ENCODER' % self.c)
self.frozen=False
for layer in self.model.layers:
layer.trainable=True
class InteractivePlot(keras.callbacks.Callback):
def __init__(self):
pass
def on_train_begin(self, logs={}):
self.losses = []
self.logs = []
self.batchnr = 0
self.icount = 0
def on_train_end(self, logs={}):
pass
def on_epoch_end(self, epoch, logs={}):
self.batchnr = 0
loss_train = logs.get('loss')
self.losses.append(loss_train)
self.icount+=1
clear_output(wait=True)
plt.figure(figsize=(14,10))
train_vals = [self.losses]
desc = ['loss']
for i in range(len(train_vals)):
#plt.subplot(2, 3, i+1)
plt.plot(range(self.icount), train_vals[i], label=desc[i])
plt.legend()
#plt.savefig(self.logfile.replace('.txt', '.png'), bbox_inches='tight', format='png')
plt.show()
def on_batch_end(self, batch, logs=None):
self.batchnr+=1
if self.batchnr % 10 == 0:
self.on_epoch_end(epoch=0, logs=logs)
def ckpt_callback(model_name, dataset, l_str, bs, extra_str='',
period=1, save_weights_only=True,
ckpt_folder_path = '../../predimportance_shared/models/ckpt/'):
path = os.path.join(ckpt_folder_path, model_name)
if not os.path.exists(path):
os.makedirs(path)
filepath = os.path.join(path, model_name+'_'+dataset+'_'+l_str+'_bs'+str(bs)+extra_str+'_ep{epoch:02d}_valloss{val_loss:.4f}.hdf5')
cb_chk = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_weights_only=True, period=1)
return cb_chk
def step_decay(init_lr = 0.0001, drop = 0.1, epochs_drop = 3.0):
def inner(epoch):
lrate = init_lr * math.pow(drop, math.floor((1+epoch)/epochs_drop))
if not (epoch+1)%epochs_drop:
print('Reducing lr. New lr is:', lrate)
return lrate
return inner

View file

@ -0,0 +1,566 @@
import numpy as np
import keras
import sys
import os
from keras.layers import Layer, Input, Multiply, Dropout,DepthwiseConv2D, TimeDistributed, LSTM, Activation, Lambda, Conv2D, Dense, GlobalAveragePooling2D, MaxPooling2D, ZeroPadding2D, UpSampling2D, BatchNormalization, Concatenate
import keras.backend as K
from keras.models import Model
import tensorflow as tf
from keras.utils import Sequence
import cv2
import scipy.io
import math
from attentive_convlstm_new import AttentiveConvLSTM2D
from dcn_resnet_new import dcn_resnet
from gaussian_prior_new import LearningPrior
from sal_imp_utilities import *
from multiduration_models import decoder_block_timedist
from xception_custom import Xception_wrapper
from keras.applications import keras_modules_injection
def xception_cl(input_shape = (None, None, 3),
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
freeze_enc=False,
dil_rate = (2,2),
freeze_cl=True,
append_classif=True,
num_classes=5):
"""Xception with classification capabilities"""
inp = Input(shape=input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
### CLASSIFIER ###
cl = GlobalAveragePooling2D(name='gap_cl')(xception.output)
cl = Dense(512,name='dense_cl')(cl)
cl = Dropout(0.3, name='dropout_cl')(cl)
cl = Dense(num_classes, activation='softmax', name='dense_cl_out')(cl)
## DECODER ##
outs_dec = decoder_block(xception.output, dil_rate=dil_rate, print_shapes=print_shapes, dec_filt=512, prefix='decoder')
outs_final = [outs_dec]*n_outs
if append_classif:
outs_final.append(cl)
# Building model
m = Model(inp, outs_final) # Last element of outs_final is classification vector
if verbose:
m.summary()
if freeze_cl:
print('Freezing classification dense layers')
m.get_layer('dense_cl').trainable = False
m.get_layer('dense_cl_out').trainable = False
return m
def xception_cl_fus(input_shape=(None, None, 3),
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
dil_rate=(2,2),
freeze_enc=False,
freeze_cl=True,
internal_filts=256,
num_classes=5,
dp=0.3):
"""Xception with classification capabilities that fuses representations from both tasks"""
inp = Input(shape=input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
### GLOBAL FEATURES ###
g_n = global_net(xception.output, nfilts=internal_filts, dp=dp)
if print_shapes: print('g_n shapes:', g_n.shape)
### CLASSIFIER ###
# We potentially need another layer here
out_classif = Dense(num_classes, activation='softmax', name='out_classif')(g_n)
### ASPP (MID LEVEL FEATURES) ###
aspp_out = app(xception.output, internal_filts)
if print_shapes: print('aspp out shapes:', aspp_out.shape)
### FUSION ###
dense_f = Dense(internal_filts, name = 'dense_fusion')(g_n)
if print_shapes: print('dense_f shapes:', dense_f.shape)
reshap = Lambda(lambda x: K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(x, axis=1), K.int_shape(aspp_out)[2], axis=1), axis=1), K.int_shape(aspp_out)[1], axis=1),
lambda s: (s[0], K.int_shape(aspp_out)[1], K.int_shape(aspp_out)[2], s[1]))(dense_f)
if print_shapes: print('after lambda shapes:', reshap.shape)
conc = Concatenate()([aspp_out,reshap])
### Projection ###
x = Conv2D(internal_filts, (1, 1), padding='same', use_bias=False, name='concat_projection')(conc)
x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
x = Activation('relu')(x)
x = Dropout(dp)(x)
### DECODER ###
outs_dec = decoder_block(x, dil_rate=dil_rate, print_shapes=print_shapes, dec_filt=internal_filts, dp=dp)
outs_final = [outs_dec]*n_outs
outs_final.append(out_classif)
# Building model
m = Model(inp, outs_final) # Last element of outs_final is classification vector
if freeze_cl:
m.get_layer('out_classif').trainable = False
# for l in g_n.layers:
# l.trainable=False
if verbose:
m.summary()
return m
def xception_cl_fus_aspp(input_shape=(None, None, 3),
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
dil_rate=(2,2),
freeze_enc=False,
freeze_cl=True,
internal_filts=256,
num_classes=6,
dp=0.3,
lambda_layer_for_save=False):
inp = Input(shape=input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
### GLOBAL FEATURES ###
g_n = global_net(xception.output, nfilts=internal_filts, dp=dp)
if print_shapes: print('g_n shapes:', g_n.shape)
### CLASSIFIER ###
# We potentially need another layer here
out_classif = Dense(num_classes, activation='softmax', name='out_classif')(g_n)
### ASPP (MID LEVEL FEATURES) ###
aspp_out = aspp(xception.output, internal_filts)
if print_shapes: print('aspp out shapes:', aspp_out.shape)
### FUSION ###
dense_f = Dense(internal_filts, name = 'dense_fusion')(g_n)
if print_shapes: print('dense_f shapes:', dense_f.shape)
if not lambda_layer_for_save:
reshap = Lambda(lambda x: K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(x, axis=1), K.int_shape(aspp_out)[2], axis=1), axis=1), K.int_shape(aspp_out)[1], axis=1),
lambda s: (s[0], K.int_shape(aspp_out)[1], K.int_shape(aspp_out)[2], s[1]))(dense_f)
else: # Use this lambda layer if you want to be able to use model.save() (set lambda_layer_for_save to True)
print("Using lambda layer adapted to model.save()")
reshap = Lambda(lambda x: K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(x, axis=1), 40, axis=1), axis=1), 30, axis=1),
lambda s: (s[0], 30, 40, s[1]))(dense_f)
# reshap = FusionReshape()(dense_f)
if print_shapes: print('after lambda shapes:', reshap.shape)
conc = Concatenate()([aspp_out,reshap])
### Projection ###
x = Conv2D(internal_filts, (1, 1), padding='same', use_bias=False, name='concat_projection')(conc)
x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
x = Activation('relu')(x)
x = Dropout(dp)(x)
### DECODER ###
outs_dec = decoder_block(x, dil_rate=dil_rate, print_shapes=print_shapes, dec_filt=internal_filts, dp=dp)
outs_final = [outs_dec]*n_outs
outs_final.append(out_classif)
# Building model
m = Model(inp, outs_final,name = 'xception_cl_fus_aspp') # Last element of outs_final is classification vector
if freeze_cl:
m.get_layer('out_classif').trainable = False
# for l in g_n.layers:
# l.trainable=False
if verbose:
m.summary()
return m
def umsi(input_shape=(None, None, 3),
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
dil_rate=(2,2),
freeze_enc=False,
freeze_cl=True,
internal_filts=256,
num_classes=6,
dp=0.3,
lambda_layer_for_save=False):
inp = Input(shape=input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
# xception.summary()
skip_layers = ['block3_sepconv2_bn','block1_conv1_act']
# sizes: 119x159x32, 59x79x256
skip_feature_maps = [xception.get_layer(n).output for n in skip_layers]
### GLOBAL FEATURES ###
g_n = global_net(xception.output, nfilts=internal_filts, dp=dp)
if print_shapes: print('g_n shapes:', g_n.shape)
### CLASSIFIER ###
# We potentially need another layer here
out_classif = Dense(num_classes, activation='softmax', name='out_classif')(g_n)
### ASPP (MID LEVEL FEATURES) ###
aspp_out = aspp(xception.output, internal_filts)
if print_shapes: print('aspp out shapes:', aspp_out.shape)
### FUSION ###
dense_f = Dense(internal_filts, name = 'dense_fusion')(g_n)
if print_shapes: print('dense_f shapes:', dense_f.shape)
if not lambda_layer_for_save:
reshap = Lambda(lambda x: K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(x, axis=1), K.int_shape(aspp_out)[2], axis=1), axis=1), K.int_shape(aspp_out)[1], axis=1),
lambda s: (s[0], K.int_shape(aspp_out)[1], K.int_shape(aspp_out)[2], s[1]))(dense_f)
else: # Use this lambda layer if you want to be able to use model.save() (set lambda_layer_for_save to True)
print("Using lambda layer adapted to model.save()")
reshap = Lambda(lambda x: K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(x, axis=1), 40, axis=1), axis=1), 30, axis=1),
lambda s: (s[0], 30, 40, s[1]))(dense_f)
# reshap = FusionReshape()(dense_f)
if print_shapes: print('after lambda shapes:', reshap.shape)
conc = Concatenate()([aspp_out,reshap])
### Projection ###
x = Conv2D(internal_filts, (1, 1), padding='same', use_bias=False, name='concat_projection')(conc)
x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
x = Activation('relu')(x)
x = Dropout(dp)(x)
### DECODER ###
# outs_dec = decoder_block(x, dil_rate=dil_rate, print_shapes=print_shapes, dec_filt=internal_filts, dp=dp)
outs_dec = decoder_with_skip(x,
skip_feature_maps,
print_shapes=print_shapes,
dec_filt=internal_filts,
dp=dp)
outs_final = [outs_dec]*n_outs
outs_final.append(out_classif)
# Building model
m = Model(inp, outs_final, name = 'umsi') # Last element of outs_final is classification vector
if freeze_cl:
m.get_layer('out_classif').trainable = False
# for l in g_n.layers:
# l.trainable=False
if verbose:
m.summary()
return m
def xception_cl_fus_skipdec(input_shape=(None, None, 3),
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
dil_rate=(2,2),
freeze_enc=False,
freeze_cl=True,
internal_filts=256,
num_classes=5,
dp=0.3):
inp = Input(shape=input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
xception.summary()
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
### GLOBAL FEATURES ###
g_n = global_net(xception.output, nfilts=internal_filts, dp=dp)
if print_shapes: print('g_n shapes:', g_n.shape)
### CLASSIFIER ###
# We potentially need another layer here
out_classif = Dense(num_classes, activation='softmax', name='out_classif')(g_n)
### ASPP (MID LEVEL FEATURES) ###
aspp_out = aspp(xception.output, internal_filts)
if print_shapes: print('aspp out shapes:', aspp_out.shape)
### FUSION ###
dense_f = Dense(internal_filts, name = 'dense_fusion')(g_n)
if print_shapes: print('dense_f shapes:', dense_f.shape)
reshap = Lambda(lambda x: K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(x, axis=1), K.int_shape(aspp_out)[2], axis=1), axis=1), K.int_shape(aspp_out)[1], axis=1),
lambda s: (s[0], K.int_shape(aspp_out)[1], K.int_shape(aspp_out)[2], s[1]))(dense_f)
if print_shapes: print('after lambda shapes:', reshap.shape)
conc = Concatenate()([aspp_out,reshap])
### Projection ###
x = Conv2D(internal_filts, (1, 1), padding='same', use_bias=False, name='concat_projection')(conc)
x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
x = Activation('relu')(x)
x = Dropout(dp)(x)
### DECODER ###
outs_dec = decoder_block(x, dil_rate=dil_rate, print_shapes=print_shapes, dec_filt=internal_filts, dp=dp)
# outs_dec = decoder_with_skip(x, dil_rate=dil_rate, print_shapes=print_shapes, dec_filt=internal_filts, dp=dp)
outs_final = [outs_dec]*n_outs
outs_final.append(out_classif)
# Building model
m = Model(inp, outs_final) # Last element of outs_final is classification vector
if freeze_cl:
m.get_layer('out_classif').trainable = False
# for l in g_n.layers:
# l.trainable=False
if verbose:
m.summary()
return m
def global_net(x, nfilts=512, dp=0.1, print_shapes = True):
x = Conv2D(nfilts, (3, 3), strides=3, padding='same', use_bias=False, name='global_conv')(x)
if print_shapes: print('Shape after global net conv:', x.shape)
x = BatchNormalization(name='global_BN',epsilon=1e-5)(x)
x = Activation('relu')(x)
x = Dropout(dp)(x)
x = GlobalAveragePooling2D()(x)
x = Dense(nfilts, name='global_dense')(x)
x = Dropout(dp)(x)
return x
def app(x, nfilts=256, prefix='app', dils=[6,12,18]):
x1 = Conv2D(nfilts, 1, padding='same', activation='relu', dilation_rate=(1,1), name=prefix+'_c1x1')(x)
x2 = Conv2D(nfilts, 3, padding='same', activation='relu', dilation_rate=(dils[0],dils[0]), name=prefix+'_c3x3d'+str(dils[0]))(x)
x3 = Conv2D(nfilts, 3, padding='same', activation='relu', dilation_rate=(dils[1],dils[1]), name=prefix+'_c3x3d'+str(dils[1]))(x)
x4 = Conv2D(nfilts, 3, padding='same', activation='relu', dilation_rate=(dils[2],dils[2]), name=prefix+'_c3x3d'+str(dils[2]))(x)
x = Concatenate()([x1,x2,x3,x4])
return x
def aspp(x, nfilts=256, prefix='aspp', dils=[6,12,18]):
x1 = Conv2D(nfilts, (1, 1), padding='same', use_bias=False, name=prefix+'_csep0')(x)
x1 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(x1)
x1 = Activation('relu', name='aspp0_activation')(x1)
# rate = 6
x2 = SepConv_BN(x, nfilts, prefix+'_csepd'+str(dils[0]), rate=dils[0], depth_activation=True, epsilon=1e-5)
# rate = 12 (24)
x3 = SepConv_BN(x, nfilts, prefix+'_csepd'+str(dils[1]),rate=dils[1], depth_activation=True, epsilon=1e-5)
# rate = 18 (36)
x4 = SepConv_BN(x, nfilts, prefix+'_csepd'+str(dils[2]),rate=dils[2], depth_activation=True, epsilon=1e-5)
x = Concatenate()([x1,x2,x3,x4])
return x
def decoder_with_skip(x, skip_tensors, dil_rate=1, print_shapes=True, dec_filt=1024, dp=0.2, ups=16, prefix='decskip'):
# sizes of input skip connections from Xception: 119x159x32, 117x157x128, 59x79x256
for i, sk in enumerate(skip_tensors, start=1):
# Upsample
x = UpSampling2D((2,2), interpolation='bilinear', name=prefix+'_ups%d'%i)(x)
if x.shape[1] != sk.shape[1] or x.shape[2] != sk.shape[2]:
x = Lambda(lambda t: tf.image.resize(t, (K.int_shape(sk)[1], K.int_shape(sk)[2])))(x)
# Concatenate
x = Concatenate()([x, sk])
# Convolve to reduce feature dimensionality
x = Conv2D(dec_filt//2**i, (1, 1), padding='same', use_bias=False, name=prefix+'_proj_%d'%i)(x)
x = BatchNormalization(name=prefix+'_bn_%d'%i, epsilon=1e-5)(x)
x = Activation('relu', name=prefix+'_act_%d'%i)(x)
# Convolve with depth sep convs
x = SepConv_BN(x,
dec_filt//2**i,
kernel_size=3,
depth_activation=True,
epsilon=1e-5,
rate=dil_rate,
prefix=prefix+'_sepconvA_%d'%i)
x = SepConv_BN(x,
dec_filt//2**i,
kernel_size=3,
depth_activation=True,
epsilon=1e-5,
rate=dil_rate,
prefix=prefix+'_sepconvB_%d'%i)
x = Dropout(dp, name=prefix+'_dp%d'%i)(x)
print("shape after block %d of dec:"%i, x.shape)
# Upsampling and normal conv
# i+=1
# x = UpSampling2D((2,2), interpolation='bilinear', name=prefix+'_ups_prefinal')(x)
# x = Conv2D(dec_filt//2**i, (3, 3), padding='same', use_bias=True, name=prefix+'_conv_%d'%i)(x)
# x = BatchNormalization(name=prefix+'_bn_%d'%i, epsilon=1e-5)(x)
# x = Activation('relu', name=prefix+'_act_%d'%i)(x)
# Final upsample to get to desired output size (480x640)
x = UpSampling2D((4,4), interpolation='bilinear', name=prefix+'_ups_final')(x)
if x.shape[1] != shape_r_out or x.shape[2] != shape_c_out:
x = Lambda(lambda t: tf.image.resize(t, (shape_r_out, shape_c_out)))(x)
if print_shapes: print('Shape after last ups and resize:',x.shape)
# Final conv to get to a heatmap
x = Conv2D(1, kernel_size=1, padding='same', activation='relu', name=prefix+'_c_out')(x)
if print_shapes: print('Shape after 1x1 conv:',x.shape)
return x
def decoder_block(x, dil_rate=(2,2), print_shapes=True, dec_filt=1024, dp=0.2, ups=16, prefix='dec'):
# Dilated convolutions
x = Conv2D(dec_filt, 3, padding='same', activation='relu', dilation_rate=dil_rate, name=prefix+'_c1')(x)
x = Conv2D(dec_filt, 3, padding='same', activation='relu', dilation_rate=dil_rate, name=prefix+'_c2')(x)
x = Dropout(dp, name=prefix+'_dp1')(x)
x = UpSampling2D((2,2), interpolation='bilinear', name=prefix+'_ups1')(x)
x = Conv2D(dec_filt//2, 3, padding='same', activation='relu', dilation_rate=dil_rate, name=prefix+'_c3')(x)
x = Conv2D(dec_filt//2, 3, padding='same', activation='relu', dilation_rate=dil_rate, name=prefix+'_c4')(x)
x = Dropout(dp, name=prefix+'_dp2')(x)
x = UpSampling2D((2,2), interpolation='bilinear', name=prefix+'_ups2')(x)
x = Conv2D(dec_filt//4, 3, padding='same', activation='relu', dilation_rate=dil_rate, name=prefix+'_c5')(x)
x = Dropout(dp, name=prefix+'_dp3')(x)
x = UpSampling2D((4,4), interpolation='bilinear', name=prefix+'_ups3')(x)
if print_shapes: print('Shape after last ups:',x.shape)
# Final conv to get to a heatmap
x = Conv2D(1, kernel_size=1, padding='same', activation='relu', name=prefix+'_c_out')(x)
if print_shapes: print('Shape after 1x1 conv:',x.shape)
return x
class FusionReshape(Layer):
def __init__(self, **kwargs):
super(FusionReshape, self).__init__(**kwargs)
def build(self, input_shape):
super(FusionReshape, self).build(input_shape)
def call(self, x):
return K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(x, axis=1), 40, axis=1), axis=1), 30, axis=1)
def compute_output_shape(self, input_shape):
return (input_shape[0], 30, 40, input_shape[1])
##### DEEPLAB V3 CODE #####
def SepConv_BN(x, filters, prefix='scb', stride=1, kernel_size=3, rate=1,
depth_activation=False, epsilon=1e-3):
""" SepConv with BN between depthwise & pointwise. Optionally add activation after BN
Implements right "same" padding for even kernel sizes
Args:
x: input tensor
filters: num of filters in pointwise convolution
prefix: prefix before name
stride: stride at depthwise conv
kernel_size: kernel size for depthwise convolution
rate: atrous rate for depthwise convolution
depth_activation: flag to use activation between depthwise & poinwise convs
epsilon: epsilon to use in BN layer
"""
if stride == 1:
depth_padding = 'same'
else:
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size_effective - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
x = ZeroPadding2D((pad_beg, pad_end))(x)
depth_padding = 'valid'
if not depth_activation:
x = Activation('relu')(x)
x = DepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate),
padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x)
x = BatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x)
if depth_activation:
x = Activation('relu')(x)
x = Conv2D(filters, (1, 1), padding='same',
use_bias=False, name=prefix + '_pointwise')(x)
x = BatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x)
if depth_activation:
x = Activation('relu')(x)
return x

View file

@ -0,0 +1,239 @@
from __future__ import print_function
from __future__ import absolute_import
from keras.layers import Input, Activation, Add
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers import BatchNormalization
from keras. models import Model
from keras import backend as K
from keras.utils.data_utils import get_file
# TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5'
WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/'
'releases/download/v0.2/'
'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5')
def identity_block(input_tensor, kernel_size, filters, stage, block):
"""The identity block is the block that has no conv layer at shortcut.
# Arguments
input_tensor: input tensor
kernel_size: default 3, the kernel size of
middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: 'a','b'..., current block label, used for generating layer names
# Returns
Output tensor for the block.
"""
filters1, filters2, filters3 = filters
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2D(filters1, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Conv2D(filters2, kernel_size,
padding='same',
kernel_initializer='he_normal',
name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Conv2D(filters3, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = Add()([x, input_tensor])
x = Activation('relu')(x)
return x
def conv_block(input_tensor,
kernel_size,
filters,
stage,
block,
strides=(2, 2)):
"""A block that has a conv layer at shortcut.
# Arguments
input_tensor: input tensor
kernel_size: default 3, the kernel size of
middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: 'a','b'..., current block label, used for generating layer names
strides: Strides for the first conv layer in the block.
# Returns
Output tensor for the block.
Note that from stage 3,
the first conv layer at main path is with strides=(2, 2)
And the shortcut should have strides=(2, 2) as well
"""
filters1, filters2, filters3 = filters
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2D(filters1, (1, 1), strides=strides,
kernel_initializer='he_normal',
name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Conv2D(filters2, kernel_size, padding='same',
kernel_initializer='he_normal',
name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Conv2D(filters3, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
shortcut = Conv2D(filters3, (1, 1), strides=strides,
kernel_initializer='he_normal',
name=conv_name_base + '1')(input_tensor)
shortcut = BatchNormalization(
axis=bn_axis, name=bn_name_base + '1')(shortcut)
x = Add()([x, shortcut])
x = Activation('relu')(x)
return x
def conv_block_atrous(input_tensor, kernel_size, filters, stage, block, atrous_rate=(2, 2)):
nb_filter1, nb_filter2, nb_filter3 = filters
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Conv2D(nb_filter2, kernel_size, padding='same',dilation_rate=atrous_rate,name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
shortcut = Conv2D(nb_filter3, (1, 1), name=conv_name_base + '1')(input_tensor)
shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
x = Add()([x, shortcut])
x = Activation('relu')(x)
return x
def identity_block_atrous(input_tensor, kernel_size, filters, stage, block, atrous_rate=(2, 2)):
nb_filter1, nb_filter2, nb_filter3 = filters
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Conv2D(nb_filter2, kernel_size, dilation_rate=atrous_rate,
padding='same', name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = Add()([x, input_tensor])
x = Activation('relu')(x)
return x
def dcn_resnet(input_tensor=None):
input_shape = (None, None, 3)
if input_tensor is None:
img_input = Input(shape=input_shape)
else:
if not K.is_keras_tensor(input_tensor):
img_input = Input(tensor=input_tensor)
else:
img_input = input_tensor
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
# conv_1
x = ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
x = Conv2D(64, (7, 7),
strides=(2, 2),
padding='valid',
kernel_initializer='he_normal',
name='conv1')(x)
x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = Activation('relu')(x)
x = ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
# conv_2
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
# conv_3
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', strides=(2, 2))
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
# conv_4
x = conv_block_atrous(x, 3, [256, 256, 1024], stage=4, block='a', atrous_rate=(2, 2))
x = identity_block_atrous(x, 3, [256, 256, 1024], stage=4, block='b', atrous_rate=(2, 2))
x = identity_block_atrous(x, 3, [256, 256, 1024], stage=4, block='c', atrous_rate=(2, 2))
x = identity_block_atrous(x, 3, [256, 256, 1024], stage=4, block='d', atrous_rate=(2, 2))
x = identity_block_atrous(x, 3, [256, 256, 1024], stage=4, block='e', atrous_rate=(2, 2))
x = identity_block_atrous(x, 3, [256, 256, 1024], stage=4, block='f', atrous_rate=(2, 2))
# conv_5
x = conv_block_atrous(x, 3, [512, 512, 2048], stage=5, block='a', atrous_rate=(4, 4))
x = identity_block_atrous(x, 3, [512, 512, 2048], stage=5, block='b', atrous_rate=(4, 4))
x = identity_block_atrous(x, 3, [512, 512, 2048], stage=5, block='c', atrous_rate=(4, 4))
# Create model
model = Model(img_input, x)
# Load weights
weights_path = get_file(
'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
WEIGHTS_PATH_NO_TOP,
cache_subdir='models',
md5_hash='a268eb855778b3df3c7506639542a6af')
model.load_weights(weights_path)
return model

View file

@ -0,0 +1,241 @@
from __future__ import division
from keras.engine.base_layer import Layer
from keras import backend as K
from keras import activations
from keras import initializers
from keras import regularizers
from keras import constraints
import numpy as np
import tensorflow as tf
def gaussian_priors_init(shape, name=None, dtype=None):
means = np.random.uniform(low=0.3, high=0.7, size=shape[0] // 2)
covars = np.random.uniform(low=0.05, high=0.3, size=shape[0] // 2)
return K.variable(np.concatenate((means, covars), axis=0), name=name)
class LearningPrior(Layer):
def __init__(self, nb_gaussian, init=None, weights=None,
W_regularizer=None, activity_regularizer=None,
W_constraint=None, **kwargs):
self.nb_gaussian = nb_gaussian
if not init:
self.init = tf.initializers.random_uniform() #replaced from gaussian_priors_init
else:
self.init = initializers.get(init)
self.W_regularizer = regularizers.get(W_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.initial_weights = weights
super(LearningPrior, self).__init__(**kwargs)
def build(self, input_shape):
self.W_shape = (self.nb_gaussian*4, )
self.W = self.add_weight(shape=self.W_shape,
initializer= self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint )
# Possibly unnecessary
self.regularizers = []
if self.W_regularizer:
self.W_regularizer.set_param(self.W)
self.regularizers.append(self.W_regularizer)
# Possibly unnecessary
self.constraints = {}
if self.W_constraint:
self.constraints[self.W] = self.W_constraint
# Possibly unnecessary
if self.activity_regularizer:
self.activity_regularizer.set_layer(self)
self.regularizers.append(self.activity_regularizer)
# Not changed because same syntax in Keras 2
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
super(LearningPrior, self).build(input_shape)
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[1], input_shape[2], self.nb_gaussian)
def call(self, x):
mu_x = self.W[:self.nb_gaussian]
mu_y = self.W[self.nb_gaussian:self.nb_gaussian*2]
sigma_x = self.W[self.nb_gaussian*2:self.nb_gaussian*3]
sigma_y = self.W[self.nb_gaussian*3:]
self.b_s = x.shape[0].value
self.height = x.shape[1].value
self.width = x.shape[2].value
e = self.height / self.width
e1 = (1 - e) / 2
e2 = e1 + e
mu_x = K.clip(mu_x, 0.25, 0.75)
mu_y = K.clip(mu_y, 0.35, 0.65)
sigma_x = K.clip(sigma_x, 0.1, 0.9)
sigma_y = K.clip(sigma_y, 0.2, 0.8)
x_t = K.dot(K.ones((self.height, 1)), K.expand_dims(self._linspace(0, 1.0, self.width), axis=0))
y_t = K.dot(K.expand_dims(self._linspace(e1, e2, self.height), axis=1), K.ones((1, self.width)))
x_t = K.repeat_elements(K.expand_dims(x_t, axis=-1), self.nb_gaussian, axis=-1)
y_t = K.repeat_elements(K.expand_dims(y_t, axis=-1), self.nb_gaussian, axis=-1)
gaussian = 1 / (2 * np.pi * sigma_x * sigma_y + K.epsilon()) * \
K.exp(-((x_t - mu_x) ** 2 / (2 * sigma_x ** 2 + K.epsilon()) +
(y_t - mu_y) ** 2 / (2 * sigma_y ** 2 + K.epsilon())))
max_gauss = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(gaussian, axis=0), axis=0), axis=0), self.width, axis=0), axis=0), self.height, axis=0)
gaussian = gaussian / max_gauss
output = K.ones_like(K.expand_dims(x[...,0]))*gaussian
return output
@staticmethod
def _linspace(start, stop, num):
lin = np.linspace(start, stop, num)
range = tf.convert_to_tensor(lin, dtype='float32')
return range
def get_config(self):
config = {'nb_gaussian': self.nb_gaussian,
# 'init': self.init.__name__,
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
}
base_config = super(LearningPrior, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class OldLearningPrior(Layer):
def __init__(self, nb_gaussian, init='normal', weights=None,
W_regularizer=None, activity_regularizer=None,
W_constraint=None, **kwargs):
self.nb_gaussian = nb_gaussian
self.init = initializations.get(init, dim_ordering='th')
self.W_regularizer = regularizers.get(W_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.input_spec = [InputSpec(ndim=4)]
self.initial_weights = weights
super(LearningPrior, self).__init__(**kwargs)
def build(self, input_shape):
self.W_shape = (self.nb_gaussian*4, )
# Might need change
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
# Might need change - to self.add_weight
self.trainable_weights = [self.W]
# Might need change - could be absorbed by add_weight
self.regularizers = []
if self.W_regularizer:
self.W_regularizer.set_param(self.W)
self.regularizers.append(self.W_regularizer)
if self.activity_regularizer:
self.activity_regularizer.set_layer(self)
self.regularizers.append(self.activity_regularizer)
# Might need change
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
# Might need change
self.constraints = {}
if self.W_constraint:
self.constraints[self.W] = self.W_constraint
def get_output_shape_for(self, input_shape):
self.b_s = input_shape[0]
self.height = input_shape[2]
self.width = input_shape[3]
return self.b_s, self.nb_gaussian, self.height, self.width
def call(self, x, mask=None):
mu_x = self.W[:self.nb_gaussian]
mu_y = self.W[self.nb_gaussian:self.nb_gaussian*2]
sigma_x = self.W[self.nb_gaussian*2:self.nb_gaussian*3]
sigma_y = self.W[self.nb_gaussian*3:]
# Needs change
self.b_s = x.shape[0]
self.height = x.shape[2]
self.width = x.shape[3]
e = self.height / self.width
e1 = (1 - e) / 2
e2 = e1 + e
mu_x = K.clip(mu_x, 0.25, 0.75)
mu_y = K.clip(mu_y, 0.35, 0.65)
sigma_x = K.clip(sigma_x, 0.1, 0.9)
sigma_y = K.clip(sigma_y, 0.2, 0.8)
x_t = T.dot(T.ones((self.height, 1)), self._linspace(0, 1.0, self.width).dimshuffle('x', 0))
y_t = T.dot(self._linspace(e1, e2, self.height).dimshuffle(0, 'x'), T.ones((1, self.width)))
x_t = K.repeat_elements(K.expand_dims(x_t, dim=-1), self.nb_gaussian, axis=-1)
y_t = K.repeat_elements(K.expand_dims(y_t, dim=-1), self.nb_gaussian, axis=-1)
gaussian = 1 / (2 * np.pi * sigma_x * sigma_y + K.epsilon()) * \
T.exp(-((x_t - mu_x) ** 2 / (2 * sigma_x ** 2 + K.epsilon()) +
(y_t - mu_y) ** 2 / (2 * sigma_y ** 2 + K.epsilon())))
gaussian = K.permute_dimensions(gaussian, (2, 0, 1))
max_gauss = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(gaussian, axis=1), axis=1)), self.height, axis=-1)), self.width, axis=-1)
gaussian = gaussian / max_gauss
output = K.repeat_elements(K.expand_dims(gaussian, dim=0), self.b_s, axis=0)
return output
@staticmethod
def _linspace(start, stop, num):
# produces results identical to:
# np.linspace(start, stop, num)
start = T.cast(start, floatX)
stop = T.cast(stop, floatX)
num = T.cast(num, floatX)
step = (stop - start) / (num - 1)
return T.arange(num, dtype=floatX) * step + start
def get_config(self):
config = {'nb_gaussian': self.nb_gaussian,
'init': self.init.__name__,
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
}
base_config = super(LearningPrior, self).get_config()
return dict(list(base_config.items()) + list(config.items()))

View file

@ -0,0 +1,256 @@
import keras.backend as K
import numpy as np
from sal_imp_utilities import *
from tensorflow.keras.losses import KLDivergence
# KL-Divergence Loss
def kl_divergence(y_true, y_pred):
max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
y_pred /= max_y_pred
sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
y_true /= (sum_y_true + K.epsilon())
y_pred /= (sum_y_pred + K.epsilon())
# This constant was defined by Cornia et al. and is a bit arbitrary
return K.sum(K.sum(y_true * K.log((y_true / (y_pred + K.epsilon())) + K.epsilon()), axis=1), axis=1)
def kl_time(y_true, y_pred):
if len(y_true.shape) == 5:
ax = 2
else:
ax = 1
max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=ax), axis=ax), axis=ax),
shape_r_out, axis=ax), axis=ax+1), shape_c_out, axis=ax+1)
y_pred /= max_y_pred
sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=ax), axis=ax), axis=ax),
shape_r_out, axis=ax), axis=ax+1), shape_c_out, axis=ax+1)
sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=ax), axis=ax), axis=ax),
shape_r_out, axis=ax), axis=ax+1), shape_c_out, axis=ax+1)
y_true /= (sum_y_true + K.epsilon())
y_pred /= (sum_y_pred + K.epsilon())
kl_out = K.sum(K.sum(y_true * K.log((y_true / (y_pred + K.epsilon())) + K.epsilon()), axis=ax), axis=ax)
if len(y_true.shape) == 5:
kl_out = K.mean(kl_out, axis = 1)
return kl_out
# Correlation Coefficient Loss
def correlation_coefficient(y_true, y_pred):
max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
y_pred /= max_y_pred
sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
y_true /= (sum_y_true + K.epsilon())
y_pred /= (sum_y_pred + K.epsilon())
N = shape_r_out * shape_c_out
sum_prod = K.sum(K.sum(y_true * y_pred, axis=1), axis=1)
sum_x = K.sum(K.sum(y_true, axis=1), axis=1)
sum_y = K.sum(K.sum(y_pred, axis=1), axis=1)
sum_x_square = K.sum(K.sum(K.square(y_true), axis=1), axis=1)
sum_y_square = K.sum(K.sum(K.square(y_pred), axis=1), axis=1)
num = sum_prod - ((sum_x * sum_y) / N)
den = K.sqrt((sum_x_square - K.square(sum_x) / N) * (sum_y_square - K.square(sum_y) / N))
return num / den
def cc_time(y_true, y_pred):
if len(y_true.shape) == 5:
ax = 2
else:
ax = 1
max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=ax), axis=ax), axis=ax),
shape_r_out, axis=ax), axis=ax+1), shape_c_out, axis=ax+1)
y_pred /= max_y_pred
sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=ax), axis=ax), axis=ax),
shape_r_out, axis=ax), axis=ax+1), shape_c_out, axis=ax+1)
sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=ax), axis=ax), axis=ax),
shape_r_out, axis=ax), axis=ax+1), shape_c_out, axis=ax+1)
y_true /= (sum_y_true + K.epsilon())
y_pred /= (sum_y_pred + K.epsilon())
N = shape_r_out * shape_c_out
sum_prod = K.sum(K.sum(y_true * y_pred, axis=ax), axis=ax)
sum_x = K.sum(K.sum(y_true, axis=ax), axis=ax)
sum_y = K.sum(K.sum(y_pred, axis=ax), axis=ax)
sum_x_square = K.sum(K.sum(K.square(y_true), axis=ax), axis=ax)
sum_y_square = K.sum(K.sum(K.square(y_pred), axis=ax), axis=ax)
num = sum_prod - ((sum_x * sum_y) / N)
den = K.sqrt((sum_x_square - K.square(sum_x) / N) * (sum_y_square - K.square(sum_y) / N))
if len(y_true.shape) == 5:
cc_out = K.mean(num / den, axis = 1)
else:
cc_out = num / den
return cc_out
# Normalized Scanpath Saliency Loss
def nss_time(y_true, y_pred):
if len(y_true.shape) == 5:
ax = 2
else:
ax = 1
maxi = K.max(K.max(y_pred, axis=ax), axis=ax)
first_rep = K.repeat_elements(K.expand_dims(maxi, axis=ax),shape_r_out, axis=ax)
max_y_pred = K.repeat_elements(K.expand_dims(first_rep, axis=ax+1), shape_c_out, axis=ax+1)
y_pred /= max_y_pred
if len(y_true.shape) == 5:
y_pred_flatten = K.reshape(y_pred, (K.shape(y_pred)[0],K.shape(y_pred)[1],K.shape(y_pred)[2]*K.shape(y_pred)[3]*K.shape(y_pred)[4])) #K.batch_flatten(y_pred)
else:
y_pred_flatten = K.batch_flatten(y_pred)
y_mean = K.mean(y_pred_flatten, axis=-1)
y_mean = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_mean)),
shape_r_out, axis=ax)), shape_c_out, axis=ax+1)
y_std = K.std(y_pred_flatten, axis=-1)
y_std = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_std)),
shape_r_out, axis=ax)), shape_c_out, axis=ax+1)
y_pred = (y_pred - y_mean) / (y_std + K.epsilon())
num = K.sum(K.sum(y_true * y_pred, axis=ax), axis=ax)
den = K.sum(K.sum(y_true, axis=ax), axis=ax) + K.epsilon()
if len(y_true.shape) == 5:
nss_out = K.mean(num/den, axis = 1)
else:
nss_out = num/den
return nss_out
def nss(y_true, y_pred):
ax = 1
if K.sum(K.sum(y_true, axis=ax), axis=ax) == 0:
return 0
max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=ax), axis=ax), axis=ax+1),
shape_r_out, axis=ax), axis=ax+1), shape_c_out, axis=ax+1)
y_pred /= max_y_pred
y_pred_flatten = K.batch_flatten(y_pred)
y_mean = K.mean(y_pred_flatten, axis=-1)
y_mean = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_mean)),
shape_r_out, axis=ax)), shape_c_out, axis=ax+1)
y_std = K.std(y_pred_flatten, axis=-1)
y_std = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_std)),
shape_r_out, axis=ax)), shape_c_out, axis=ax+1)
y_pred = (y_pred - y_mean) / (y_std + K.epsilon())
den = K.sum(K.sum(y_true * y_pred, axis=ax), axis=ax)
nom = K.sum(K.sum(y_true, axis=ax), axis=ax) + K.epsilon()
nss_out = den/nom
return nss_out
def cc_match(y_true, y_pred):
'''Calculates CC between initial, mid and final timestep from both y_true and y_pred
and calculates the mean absolute error between the CCs from y_true and from y_pred.
Requires a y_true and y_pred to be tensors of shape (bs, t, r, c, 1)'''
mid = 1 # y_true.shape[1].value//2
ccim_true = cc_time(y_true[:,0,...], y_true[:,mid,...])
ccmf_true = cc_time(y_true[:,mid,...], y_true[:,-1,...])
ccim_pred = cc_time(y_pred[:,0,...], y_pred[:,mid,...])
ccmf_pred = cc_time(y_pred[:,mid,...], y_pred[:,-1,...])
return (K.abs(ccim_true-ccim_pred) + K.abs(ccmf_true-ccmf_pred) )/2
def kl_cc_nss_combined(lw=[10,-2,-1]):
# DEPRECATED
'''Loss function that combines cc, nss and kl. Beacuse nss receives a different ground truth than kl and cc (maps),
the function requires y_true to contains both maps. It has to be a tensor with dimensions [bs, 2, r, c, 1]. y_pred also
has to be a tensor of the same dim, so the model should add a 5th dimension between bs and r and repeat the predict map
twice along that dim.
'''
def loss(y_true, y_pred):
map_true = y_true[:,0,...]
fix_true = y_true[:,1,...]
pred = y_pred[:,0,...]
k = kl_divergence(map_true, pred)
c = correlation_coefficient(map_true, pred)
n = nss(fix_true, pred)
return lw[0]*k+lw[1]*c+lw[2]*n
return loss
def loss_wrapper(loss, input_shape):
shape_r_out, shape_c_out = input_shape
print("shape r out, shape c out", shape_r_out, shape_c_out)
def _wrapper(y_true, y_pred):
return loss(y_true, y_pred)
return _wrapper
def kl_new(y_true, y_pred):
'''
This function is for singleduration model. The old kl_divergence() may cause nan in training.
'''
max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
y_pred /= max_y_pred
sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=1), axis=1), axis=1),
shape_r_out, axis=1), axis=2), shape_c_out, axis=2)
y_true /= (sum_y_true + K.epsilon())
y_pred /= (sum_y_pred + K.epsilon())
kl = tf.keras.losses.KLDivergence()
return kl(y_true,y_pred)
def kl_cc_combined(y_true, y_pred):
# For Singleduration
'''Loss function that combines cc, nss and kl. Beacuse nss receives a different ground truth than kl and cc (maps),
the function requires y_true to contains both maps. It has to be a tensor with dimensions [bs, 2, r, c, 1]. y_pred also
has to be a tensor of the same dim, so the model should add a 5th dimension between bs and r and repeat the predict map
twice along that dim.
'''
#k = kl_time(y_true, y_pred)
k = kl_new(y_true, y_pred)
print('k=',k)
#c = cc_time(y_true, y_pred)
c = correlation_coefficient(y_true, y_pred)
print('c=', c)
return 10*k-3*c

View file

@ -0,0 +1,944 @@
import numpy as np
import keras
import matplotlib.pyplot as plt
import sys
import os
from keras.layers import Input, TimeDistributed, Lambda, Conv2D, MaxPooling2D, UpSampling2D, Concatenate
import keras.backend as K
from keras.models import Model
import tensorflow as tf
from keras.utils import Sequence
from keras.optimizers import Adam, RMSprop, SGD
import cv2
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from PIL import Image
from IPython.display import clear_output
import scipy.io
from copy import deepcopy
import re
# DEBUG
DEBUG = False
# number of rows of input images
cat2000_c = 1920
cat2000_r = 1080
#cat2000_r_out = 1088 # this is divisible by 16
cat2000_r_out = 1104 # divible by 48
cat2000_c_out = cat2000_c # already divisible by 16
cc_c = 300
cc_r = 225
cc_c_out = 1776
cc_r_out = 1344
#shape_r = int(cat2000_r/6)
shape_r = 240
#shape_r = cc_r
# number of cols of input images
#shape_c = int(cat2000_c/6)
shape_c = 320
#shape_c = cc_c
# number of rows of downsampled maps
shape_r_gt = 30
# number of cols of downsampled maps
shape_c_gt = 40
# number of rows of model outputs
#shape_r_out = cat2000_r_out
shape_r_out = 480
#shape_r_out = cc_r_out
# number of cols of model outputs
#shape_c_out = cat2000_c_out
shape_c_out = 640
#shape_c_out = cc_c_out
# final upsampling factor
upsampling_factor = 16
# number of epochs
nb_epoch = 50
# number of timesteps
nb_timestep = 3
# number of learned priors
nb_gaussian = 16
def repeat(x):
return K.repeat_elements(K.expand_dims(x,axis=1), nb_timestep, axis=1)
# return K.reshape(K.repeat(K.batch_flatten(x), nb_timestep), (1, nb_timestep, shape_r_gt, shape_c_gt, 512))
def repeat_shape(s):
return (s[0], nb_timestep) + s[1:]
def padding(img, shape_r, shape_c, channels=3):
img_padded = np.zeros((shape_r, shape_c, channels), dtype=np.uint8)
if channels == 1:
img_padded = np.zeros((shape_r, shape_c), dtype=np.uint8)
original_shape = img.shape
rows_rate = original_shape[0]/shape_r
cols_rate = original_shape[1]/shape_c
if rows_rate > cols_rate:
new_cols = (original_shape[1] * shape_r) // original_shape[0]
img = cv2.resize(img, (new_cols, shape_r))
if new_cols > shape_c:
new_cols = shape_c
img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols)] = img
else:
new_rows = (original_shape[0] * shape_c) // original_shape[1]
img = cv2.resize(img, (shape_c, new_rows))
if new_rows > shape_r:
new_rows = shape_r
img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img
return img_padded
def resize_fixation(img, rows=480, cols=640):
out = np.zeros((rows, cols))
factor_scale_r = rows / img.shape[0]
factor_scale_c = cols / img.shape[1]
coords = np.argwhere(img)
for coord in coords:
r = int(np.round(coord[0]*factor_scale_r))
c = int(np.round(coord[1]*factor_scale_c))
if r == rows:
r -= 1
if c == cols:
c -= 1
out[r, c] = 1
return out
def padding_fixation(img, shape_r, shape_c):
img_padded = np.zeros((shape_r, shape_c))
original_shape = img.shape
rows_rate = original_shape[0]/shape_r
cols_rate = original_shape[1]/shape_c
if rows_rate > cols_rate:
new_cols = (original_shape[1] * shape_r) // original_shape[0]
img = resize_fixation(img, rows=shape_r, cols=new_cols)
if new_cols > shape_c:
new_cols = shape_c
img_padded[:, ((img_padded.shape[1] - new_cols) // 2):((img_padded.shape[1] - new_cols) // 2 + new_cols)] = img
else:
new_rows = (original_shape[0] * shape_c) // original_shape[1]
img = resize_fixation(img, rows=new_rows, cols=shape_c)
if new_rows > shape_r:
new_rows = shape_r
img_padded[((img_padded.shape[0] - new_rows) // 2):((img_padded.shape[0] - new_rows) // 2 + new_rows), :] = img
return img_padded
def preprocess_fixmaps(paths, shape_r, shape_c, fix_as_mat=False, fix_key="", pad=True):
if pad:
ims = np.zeros((len(paths), shape_r, shape_c, 1))
else:
ims = []
# print('ims.shape:',ims.shape)
for i, path in enumerate(paths):
if path == 'dummy':
fix_map = np.zeros((480,640))
elif fix_as_mat:
mat = scipy.io.loadmat(path)
if DEBUG:
print('mat',mat)
fix_map = mat[fix_key]
else:
fix_map = cv2.imread(path, 0)
if DEBUG:
print('fix_map shape, np.max(fix_map),np.min(fix_map),np.mean(fix_map)',fix_map.shape,np.max(fix_map),np.min(fix_map),np.mean(fix_map))
if pad:
ims[i, :, :, 0] = padding_fixation(fix_map, shape_r=shape_r, shape_c=shape_c)
else:
ims.append(fix_map)
# ims = np.array(ims)
# print('ims[-1].shape:',ims[-1].shape)
return ims
def load_maps(paths):
ims = []
for i, path in enumerate(paths):
original_map = np.load(path, allow_pickle=True)
# TODO: chect for /255.0
ims.append(original_map.astype(np.float32))
ims = np.array(ims)
# print('load_maps: ims[-1].shape',ims[-1].shape)
return ims
def preprocess_maps(paths, shape_r, shape_c, pad=True):
if pad:
ims = np.zeros((len(paths), shape_r, shape_c, 1))
else:
ims = []
for i, path in enumerate(paths):
original_map = cv2.imread(path, 0)
if pad:
padded_map = padding(original_map, shape_r, shape_c, 1)
ims[i,:,:, 0] = padded_map.astype(np.float32)
ims[i,:,:, 0] /= 255.0
else:
ims.append(original_map.astype(np.float32)/255.0)
# ims = np.array(ims)
# print('ims.shape in preprocess_maps',ims.shape)
# print('prep_maps: ims[-1].shape',ims[-1].shape)
return ims
def load_images(paths):
ims =[]
for i, path in enumerate(paths):
img = np.load(path, allow_pickle=True)
ims.append(img)
ims = np.array(ims)
# print('load_images: ims.shape',np.array(ims).shape)
return ims
def preprocess_images(paths, shape_r, shape_c, pad=True):
if pad:
ims = np.zeros((len(paths), shape_r, shape_c, 3))
else:
ims =[]
for i, path in enumerate(paths):
original_image = cv2.imread(path)
if original_image is None:
raise ValueError('Path unreadable: %s' % path)
if pad:
padded_image = padding(original_image, shape_r, shape_c, 3)
ims[i] = padded_image
else:
original_image = original_image.astype(np.float32)
original_image[..., 0] -= 103.939
original_image[..., 1] -= 116.779
original_image[..., 2] -= 123.68
ims.append(original_image)
# ims = np.array(ims)
print('ims.shape in preprocess_imgs',ims.shape)
# DEBUG
# plt.figure()
# plt.subplot(1,2,1)
# plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
# plt.subplot(1,2,2)
# plt.imshow(cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB))
# plt.suptitle(path)
if pad:
ims[:, :, :, 0] -= 103.939
ims[:, :, :, 1] -= 116.779
ims[:, :, :, 2] -= 123.68
return ims
def reverse_preprocess(img):
im = deepcopy(img)
im[:, :, 0] += 103.939
im[:, :, 1] += 116.779
im[:, :, 2] += 123.68
# print(np.max(im), np.min(im), type(im[0][0][0]))
im = im[...,::-1]
im = np.array(im, dtype=np.uint8)
return im
def postprocess_predictions(pred, shape_r, shape_c, blur=False, normalize=False, zero_to_255 = False):
predictions_shape = pred.shape
rows_rate = shape_r / predictions_shape[0]
cols_rate = shape_c / predictions_shape[1]
# pred = pred / np.max(pred) * 255
# print('Preparing to resize...')
if blur:
sigma=blur
pred = scipy.ndimage.filters.gaussian_filter(pred, sigma=sigma)
if rows_rate > cols_rate:
new_cols = (predictions_shape[1] * shape_r) // predictions_shape[0]
pred = cv2.resize(pred, (new_cols, shape_r))
img = pred[:, ((pred.shape[1] - shape_c) // 2):((pred.shape[1] - shape_c) // 2 + shape_c)]
else:
new_rows = (predictions_shape[0] * shape_c) // predictions_shape[1]
pred = cv2.resize(pred, (shape_c, new_rows))
img = pred[((pred.shape[0] - shape_r) // 2):((pred.shape[0] - shape_r) // 2 + shape_r), :]
# print('Resized')
if normalize:
img = img / np.max(img) * 255
if zero_to_255:
img = np.abs(img - 255)
return img
class MultidurationGenerator(Sequence):
def __init__(self,
img_filenames,
map_filenames=None,
fix_filenames=None,
batch_size=1,
img_size=(shape_r,shape_c),
map_size=(shape_r_out, shape_c_out),
shuffle=True,
augment=False,
n_output_maps=1,
n_output_fixs=1,
mode = 'multistream_concat',
fix_key='',
return_names=False,
fix_as_mat=False,
pad_gt_maps=True,
read_npy=False
):
'''
Generator for multi-duration saliency data. Receives lists of images, and t lists of heatmaps and fixations, where t
is the number of saliency time steps to yield. The generator will automatically infer t from the length of map_filenames.
This generator has 3 different modes:
1. multistream_concat: concatenates fix and maps for a given timestep into one tensor of shape (bs, 2, r, c, 1). Then appends
all these tensors in a list of size t, and yields that tensor as y_true. This mode is made to work with losses that recuperate the
map and fixation by slicing the y_true tensor internally.
2. multistream_full: doesn't concatenate the fix and maps; instead, yields all fixations and maps needed for each timestep as a
different element in the final output list. For example, if we are training with 3 losses and 2 timesteps, this generator will
yield a list of length 6 as y_true output: 3 maps/fis for timestep1, and 3 maps/fixs for timestep2.
3. singlestream: concatenates all timesteps in one tensor. for each loss, the generator will yield a tensor of shape
(bs, time, r, c, 1). If we are working with kl, cc and nss, for example, the generator will output a list of length 3,
where each element is a tensor of the mentioned shape. This mode should be used with losses that are adapted to tensors with
a time dimension.
'''
print('Instantiating MultidurationGenerator. \
Number of files received: %d. Batch size: %d. \
Image size: %s. Augmentation: %d. Mode: %s' \
% (len(img_filenames), batch_size, str(img_size), augment,mode ))
if (mode == 'multistream_concat') and (map_filenames is None or fix_filenames is None):
print('Multistream concat can only be used when both fixations and maps are provided. \
If only one is enough, use `multistream_full`.')
self.n_output_maps = n_output_maps
self.n_output_fixs = n_output_fixs
self.fix_as_mat = fix_as_mat
self.fix_key = fix_key
self.pad_gt_maps = pad_gt_maps
self.img_filenames = np.array(img_filenames)
self.read_npy = read_npy
# check that maps make sense
if map_filenames is not None:
self.map_filenames = np.array(map_filenames)
assert all([len(self.img_filenames) == len(elt) for elt in self.map_filenames]), "Mismatch between images and maps. Images size: " + self.img_filenames.shape.__str__() + " Maps size: " + self.map_filenames.shape.__str__()
self.timesteps = len(map_filenames)
else:
self.n_output_maps = 0
self.map_filenames = None
print('Warning: No maps filenames provided, no outputs of that kind will be generated')
# check that fixs make sense
if fix_filenames is not None:
self.fix_filenames = np.array(fix_filenames)
assert all([len(self.img_filenames) == len(elt) for elt in self.fix_filenames]), "Mismatch between images and fixations. Images size: " + self.img_filenames.shape.__str__() + " Fix size: " + self.fix_filenames.shape.__str__()
self.timesteps = len(fix_filenames)
else:
self.n_output_fixs = 0
self.fix_filenames = None
print('Warning: No fix filenames provided, no outputs of that kind will be generated')
self.batch_size = batch_size
self.img_size = img_size
self.map_size = map_size
self.shuffle = shuffle
self.augment = augment
self.mode = mode
self.return_names = return_names
# Defining augmentation sequence
if augment:
sometimes = lambda aug: iaa.Sometimes(0.4, aug)
self.seq = iaa.Sequential([
sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
iaa.Fliplr(0.5), # horizontally flip 50% of the images
sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
sometimes(iaa.Affine(rotate=(-15, 15)))
], random_order=True)
if shuffle:
self.on_epoch_end()
def __len__(self):
return int(np.ceil(len(self.img_filenames) / float(self.batch_size)))
def __getitem__(self, idx):
# Get input images
batch_imgs = self.img_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
if self.read_npy:
images = load_images(batch_imgs)
else:
images = preprocess_images(batch_imgs, self.img_size[0], self.img_size[1])
# Get ground truth maps for all times
if self.n_output_maps>=1:
maps = []
for t in range(self.timesteps):
maps_names_t = self.map_filenames[t][idx * self.batch_size : (idx + 1) * self.batch_size]
if self.read_npy:
maps_t = load_maps(maps_names_t)
else:
maps_t = preprocess_maps(maps_names_t, self.map_size[0], self.map_size[1], pad=self.pad_gt_maps)
maps.append(maps_t)
# Get fix maps for all times
if self.n_output_fixs>=1:
fixs = []
for t in range(self.timesteps):
fix_names_t = self.fix_filenames[t][idx * self.batch_size : (idx + 1) * self.batch_size]
if self.read_npy:
fix_t = load_images(fix_names_t)
else:
fix_t = preprocess_fixmaps(fix_names_t, self.map_size[0], self.map_size[1], fix_as_mat=self.fix_as_mat, fix_key=self.fix_key, pad=self.pad_gt_maps)
fixs.append(fix_t)
if self.augment:
seq_det = self.seq.to_deterministic()
images = seq_det.augment_images(images)
for ta in range(len(maps)):
if self.n_output_maps>=1:
maps[ta] = seq_det.augment_heatmaps(maps[ta])
if self.n_output_fixs>=1:
fixs[ta] = seq_det.augment_heatmaps(fixs[ta])
if self.mode == 'singlestream':
# Returns a list of n_output_maps+n_output_fixs elements. Each element is a 5D tensor: (bs, timesteps, r, c, 1)
outs = []
if self.n_output_maps>=1:
maps_with_time = np.zeros((len(batch_imgs),self.timesteps,self.map_size[0],self.map_size[1],1))
for i in range(self.timesteps):
maps_with_time[:,i,...] = maps[i]
# new version of block above that handles images of varying size
# maps_with_time = []
# for bidx in range(self.batch_size):
# # maps_with_time is list of len batch_size with 3D tensors of shape t,w,h
# maps_with_time.append( [maps[ti][bidx] for ti in range(self.timesteps)] )
outs.extend([maps_with_time]*self.n_output_maps)
if self.n_output_fixs>=1:
fixs_with_time = np.zeros((len(batch_imgs),self.timesteps,self.map_size[0],self.map_size[1],1))
for i in range(self.timesteps):
fixs_with_time[:,i,...] = fixs[i]
# new version of block above that handles images of varying size
# fixs_with_time = []
# for bidx in range(self.batch_size):
# # fixs_with_time is list of len batch_size with 3D tensors of shape t,w,h
# fixs_with_time.append( np.array([fixs[ti][bidx] for ti in range(self.timesteps)]) )
outs.extend([fixs_with_time]*self.n_output_fixs)
elif self.mode == 'multistream_concat':
# returns a list of t elements: [ [maps_t1,fix_t1], [maps_t2,fix_t2] , [maps_t3,fix_t3], ...]
outs=[]
for i in range(self.timesteps):
outs.append(np.concatenate([np.expand_dims(maps[i],axis=1),np.expand_dims(fixs[i],axis=1)], axis=1))
# print('len(outs) multistream concat:',len(outs))
elif self.mode == 'multistream_full':
# returns a list of size timestep*losses. If 2 losses maps, 1 loss fix, 2 timesteps, we have: [m1, m1, m2, m2, fix1, fix2]
outs = []
if self.n_output_maps >= 1:
for i in range(self.timesteps):
outs.extend([maps[i]]*self.n_output_maps)
if self.n_output_fixs >= 1:
for i in range(self.timesteps):
outs.extend([fixs[i]]*self.n_output_fixs)
if self.return_names:
return images, outs, batch_imgs
return images, outs
def on_epoch_end(self):
if self.shuffle:
idxs = list(range(len(self.img_filenames)))
np.random.shuffle(idxs)
self.img_filenames = self.img_filenames[idxs]
for i in range(len(self.map_filenames)):
self.map_filenames[i] = self.map_filenames[i][idxs]
if self.fix_filenames is not None:
self.fix_filenames[i] = self.fix_filenames[i][idxs]
class SalImpGenerator(Sequence):
def __init__(
self,
img_filenames,
imp_filenames,
fix_filenames=None,
batch_size=1,
img_size=(shape_r,shape_c),
map_size=(shape_r_out, shape_c_out),
shuffle=True,
augment=False,
n_output_maps=1,
concat_fix_and_maps=True,
fix_as_mat=False,
fix_key="",
pad_maps=True,
pad_imgs=True,
read_npy=False,
return_names=False):
print('Instantiating SalImpGenerator. Number of files received: %d. Batch size: %d. Image size: %s. Map size: %s. Augmentation: %d, Pad_imgs: %s. Pad_maps: %s.' %
(len(img_filenames), batch_size, str(img_size), str(map_size), augment, pad_imgs, pad_maps ))
self.img_filenames = np.array(img_filenames)
self.imp_filenames = np.array(imp_filenames)
self.batch_size = batch_size
self.img_size = img_size
self.map_size = map_size
self.shuffle = shuffle
self.augment = augment
self.n_output_maps = n_output_maps
self.concat_fix_and_maps = concat_fix_and_maps
self.fix_as_mat=fix_as_mat
self.fix_key = fix_key
self.pad_imgs = pad_imgs
self.pad_maps = pad_maps
self.return_names=return_names
self.read_npy = read_npy
if fix_filenames is not None:
self.fix_filenames = np.array(fix_filenames)
else:
self.fix_filenames = None
if augment:
sometimes = lambda aug: iaa.Sometimes(0.4, aug)
self.seq = iaa.Sequential([
sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
iaa.Fliplr(0.5), # horizontally flip 50% of the images
sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
sometimes(iaa.Affine(rotate=(-15, 15)))
], random_order=True)
if shuffle:
self.on_epoch_end()
def __len__(self):
return int(np.ceil(len(self.img_filenames) / float(self.batch_size)))
def __getitem__(self, idx):
batch_x = self.img_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
batch_y = self.imp_filenames[idx * self.batch_size : (idx + 1) * self.batch_size]
# print('img names in this batch:', batch_x)
# print('imp names in this batch:', batch_y)
if self.read_npy:
images = load_images(batch_x)
maps = load_maps(batch_y)
else:
images = preprocess_images(batch_x, self.img_size[0], self.img_size[1], pad =self.pad_imgs)
maps = preprocess_maps(batch_y, self.map_size[0], self.map_size[1], pad =self.pad_maps)
if self.fix_filenames is not None:
if self.read_npy:
fixs = load_images(self.fix_filenames[idx * self.batch_size : (idx + 1) * self.batch_size])
else:
fixs = preprocess_fixmaps(
self.fix_filenames[idx * self.batch_size : (idx + 1) * self.batch_size],
self.map_size[0],
self.map_size[1],
fix_as_mat=self.fix_as_mat,
fix_key=self.fix_key)
if self.augment:
seq_det = self.seq.to_deterministic()
images = seq_det.augment_images(images)
maps = seq_det.augment_heatmaps(maps)
if self.fix_filenames is not None:
fixs = seq_det.augment_heatmaps(fixs)
if self.fix_filenames is not None and self.concat_fix_and_maps:
outs = np.concatenate([np.expand_dims(maps,axis=1),np.expand_dims(fixs,axis=1)], axis=1)
if self.n_output_maps >1:
outs = [outs]*self.n_output_maps
else:
if self.n_output_maps ==1:
if self.fix_filenames is not None:
outs=[maps,fixs]
else:
outs=maps
else:
outs = [maps]*self.n_output_maps
if self.fix_filenames is not None:
outs.append(fixs)
# print('generator: len(outs) should be 3:', len(outs))
# print('generator: outs[0].shape (should be bs,2,r,c,1):', outs[0].shape)
# print('generator: outs[0][0][0].shape (should be first map of batch)',outs[0][0][0].shape)
if self.return_names:
return images, outs, batch_x
return images, outs
def on_epoch_end(self):
if self.shuffle:
idxs = list(range(len(self.img_filenames)))
np.random.shuffle(idxs)
self.img_filenames = self.img_filenames[idxs]
self.imp_filenames = self.imp_filenames[idxs]
if self.fix_filenames is not None:
self.fix_filenames = self.fix_filenames[idxs]
def eval_generator(
img_filenames,
map_filenames,
fixmap_filenames,
fixcoord_filenames,
inp_size,
fix_as_mat=False,
fix_key="",
fixcoord_filetype='mat',
):
"""
Returns tuples img, heatmap, fixmap, fix_coords to be used for data eval
img_filenames, map_filesnames, fixmap_filenames should a length-n list where
n is the number of timestamps
heatmap, fixmap, fixcoords are all also length-n
"""
assert len(map_filenames) == len(fixmap_filenames)
n_times = len(map_filenames)
n_img = len(map_filenames[0])
for i in range(n_img):
imgs = []
maps = []
fixmaps = []
fixcoords = []
#img = load_images([img_filenames[i]])
img = preprocess_images([img_filenames[i]], inp_size[0], inp_size[1])
for t in range(n_times):
# load the image
#img = cv2.imread(img_filenames[i])
map_ = cv2.imread(map_filenames[t][i], cv2.IMREAD_GRAYSCALE)
# print("map max min", map_.max(), map_.min())
mapshape = map_.shape
if fix_as_mat:
# fixmap = load_images([fixmap_filenames[t][i]],)
fixmap = preprocess_fixmaps(
[fixmap_filenames[t][i]],
mapshape[0],
mapshape[1],
fix_as_mat=fix_as_mat,
fix_key=fix_key)
fixmap = np.squeeze(fixmap)
else:
fixmap = cv2.imread(fixmap_filenames[t][i], 0)
if fixcoord_filenames:
assert len(fixcoord_filenames) == n_times
if fixcoord_filetype == 'mat':
fixdata = scipy.io.loadmat(fixcoord_filenames[t][i])
resolution = fixdata["resolution"][0]
#assert resolution[0] == img.shape[1] and resolution[1] == img.shape[2]
fix_coords_all_participants = fixdata["gaze"]
all_fixations = []
for participant in fix_coords_all_participants:
all_fixations.extend(participant[0][2])
else:
raise RuntimeError("fixcoord filetype %s is unsupported" % fixcoord_filetype)
else:
all_fixations = None
imgs.append(img)
maps.append(map_)
fixmaps.append(fixmap)
fixcoords.append(all_fixations)
yield (imgs, maps, fixmaps, fixcoords, img_filenames[i])
def get_str2label(dataset_path, label_mapping_file=None):
str2label={}
if label_mapping_file:
with open(label_mapping_file, "r") as f:
lines = [l.strip() for l in f.readlines()]
for l in lines:
cl = l.split()[0]
i = l.split()[-1]
str2label[cl] = int(i)
else:
for i,cl in enumerate([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]):
str2label[cl] = i
return str2label
def get_labels(filenames, str2label=None):
if not str2label:
str2label = get_str2label(dataset_path = '/netpool/homes/wangyo/Dataset/imp1k/imgs', label_mapping_file = "/netpool/homes/wangyo/Dataset/imp1k/imp1k_with_nat_images_label_map.txt")
onehot_arr = np.zeros((len(filenames), len(str2label)))
# print('filenames in get labels',filenames)
for i,f in enumerate(filenames):
split = re.split('/|\\\\',f)
class_name = split[-2]
if split[-4] == 'Salicon':
label = str2label['natural_images']
onehot_arr[i, label] = 1
else:
# print('CLASS NAME IN GET_LABELS', class_name)
label = str2label[class_name]
onehot_arr[i, label] = 1
return onehot_arr
class ImpAndClassifGenerator(Sequence):
def __init__(
self,
img_filenames,
imp_filenames,
fix_filenames=None,
extra_imgs=None, # For feeding a much larger dataset, e.g. salicon, that the generator will subsample to maintain class balance
extra_imps=None,
extra_fixs=None,
extras_per_epoch=160,
batch_size=1,
img_size=(shape_r,shape_c),
map_size=(shape_r_out, shape_c_out),
shuffle=True,
augment=False,
n_output_maps=1,
concat_fix_and_maps=True,
fix_as_mat=False,
fix_key="",
str2label=None,
dummy_labels=False,
num_classes=6,
pad_imgs=True,
pad_maps=True,
return_names=False,
return_labels=True,
read_npy=False):
print('Instantiating ImpAndClassifGenerator. Number of files received: %d. Extras: %s. Batch size: %d. Image size: %s. Map size: %s. Augmentation: %d, Pad_imgs: %s. Pad_maps: %s. Num classes: %d.' %
(len(img_filenames), len(extra_imgs) if extra_imgs is not None else None, batch_size, str(img_size), str(map_size), augment, pad_imgs, pad_maps, num_classes ))
self.img_filenames = np.array(img_filenames)
self.imp_filenames = np.array(imp_filenames)
self.batch_size = batch_size
self.img_size = img_size
self.map_size = map_size
self.shuffle = shuffle
self.augment = augment
self.n_output_maps = n_output_maps
self.concat_fix_and_maps = concat_fix_and_maps
self.fix_as_mat = fix_as_mat
self.fix_key = fix_key
self.str2label = str2label
self.num_classes = num_classes
self.dummy_labels = dummy_labels
self.pad_imgs = pad_imgs
self.pad_maps = pad_maps
self.extra_idx = 0
self.extra_imgs = np.array(extra_imgs) if extra_imgs is not None else None
self.extra_imps = np.array(extra_imps) if extra_imps is not None else None
self.extra_fixs = np.array(extra_fixs) if extra_fixs is not None else None
self.extras_per_epoch = extras_per_epoch
self.return_names = return_names
self.return_labels=return_labels
self.read_npy=read_npy
if fix_filenames is not None:
self.fix_filenames = np.array(fix_filenames)
else:
self.fix_filenames = None
if augment:
sometimes = lambda aug: iaa.Sometimes(0.4, aug)
self.seq = iaa.Sequential([
sometimes(iaa.CropAndPad(px=(0, 20))), # crop images from each side by 0 to 16px (randomly chosen)
iaa.Fliplr(0.5), # horizontally flip 50% of the images
sometimes(iaa.CoarseDropout(p=0.1, size_percent=0.05)),
sometimes(iaa.Affine(rotate=(-15, 15)))
], random_order=True)
self.on_epoch_end()
def __len__(self):
return int(np.ceil(len(self.imgs_this_epoch) / float(self.batch_size)))
def __getitem__(self, idx):
batch_x = self.imgs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size]
batch_y = self.imps_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size]
# print('img names in this batch:', batch_x)
# print('imp names in this batch:', batch_y)
if self.read_npy:
images = load_images(batch_x)
maps = load_maps(batch_y)
else:
images = preprocess_images(batch_x, self.img_size[0], self.img_size[1], pad= self.pad_imgs)
maps = preprocess_maps(batch_y, self.map_size[0], self.map_size[1], pad=self.pad_maps)
if not self.dummy_labels:
labels = get_labels(batch_x, self.str2label) # Returns a numpy array of shape (bs, num_classes)
else:
labels = np.zeros((len(images),self.num_classes))
if self.fix_filenames is not None:
if read_npy:
fixs = load_images(self.fixs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size])
else:
fixs = preprocess_fixmaps(
self.fixs_this_epoch[idx * self.batch_size : (idx + 1) * self.batch_size],
self.map_size[0],
self.map_size[1],
fix_as_mat=self.fix_as_mat,
fix_key=self.fix_key)
if self.augment:
seq_det = self.seq.to_deterministic()
images = seq_det.augment_images(images)
maps = seq_det.augment_heatmaps(maps)
if self.fixs_this_epoch is not None:
fixs = seq_det.augment_heatmaps(fixs)
if self.fix_filenames is not None and self.concat_fix_and_maps:
# outs = np.concatenate([np.expand_dims(maps,axis=1),np.expand_dims(fixs,axis=1)], axis=1)
if self.n_output_maps >1:
outs = [outs]*self.n_output_maps
if self.return_labels: outs.append(labels)
else:
if self.n_output_maps ==1:
if self.fix_filenames is not None:
outs=[maps,fixs]
if self.return_labels: outs.append(labels)
else:
outs=[maps]
if self.return_labels: outs.append(labels)
else:
outs = [maps]*self.n_output_maps
if self.fix_filenames is not None:
outs.append(fixs)
if self.return_labels: outs.append(labels)
if self.return_names:
outs.append(batch_x)
return images, outs
def on_epoch_end(self):
if self.extra_imgs is not None:
# Sample a new set of extra images
extra_imgs_this_epoch = self.extra_imgs[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
extra_imps_this_epoch = self.extra_imps[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
if self.extra_fixs is not None:
extra_fixs_this_epoch = self.extra_fixs[self.extra_idx * self.extras_per_epoch : (self.extra_idx+1) * self.extras_per_epoch]
else:
extra_fixs_this_epoch = []
self.extra_idx +=1
else:
extra_imgs_this_epoch = []
extra_imps_this_epoch = []
extra_fixs_this_epoch = []
self.imgs_this_epoch = np.concatenate([self.img_filenames, extra_imgs_this_epoch])
self.imps_this_epoch = np.concatenate([self.imp_filenames, extra_imps_this_epoch])
if self.fix_filenames is not None:
self.fixs_this_epoch = np.concatenate([self.fix_filenames, extra_fixs_this_epoch])
idxs = np.array(range(len(self.imgs_this_epoch)))
if self.shuffle:
np.random.shuffle(idxs)
self.imgs_this_epoch = self.imgs_this_epoch[idxs]
self.imps_this_epoch = self.imps_this_epoch[idxs]
if self.fix_filenames is not None:
self.fixs_this_epoch = self.fixs_this_epoch[idxs]
def UMSI_eval_generator(
img_filenames,
map_filenames,
inp_size,
):
"""
Returns tuples img, heatmap to be used for data eval
"""
n_img = len(map_filenames[0])
for i in range(n_img):
imgs = []
maps = []
img = preprocess_images([img_filenames[i]], inp_size[0], inp_size[1])
map_ = cv2.imread(map_filenames[i], cv2.IMREAD_GRAYSCALE)
mapshape = map_.shape
imgs.append(img)
maps.append(map_)
yield (imgs, maps, img_filenames[i])
class RecallNet_Generator(Sequence) :
def __init__(self, image_filenames, labels, mean_accs, type0_accs, batch_size=8) :
self.image_filenames = np.array(image_filenames)
self.labels = np.array(labels)
self.mean_accs = np.array(mean_accs)
self.type0_accs = np.array(type0_accs)
self.batch_size = batch_size
def __len__(self) :
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx) :
batch_label = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
batch_img = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
batch_img = preprocess_images(batch_img,240,320)
batch_mean = self.mean_accs[idx * self.batch_size : (idx+1) * self.batch_size]
batch_type0 = self.type0_accs[idx * self.batch_size : (idx+1) * self.batch_size]
#out = [batch_mean,batch_type0,batch_label]
out = [batch_mean,batch_type0]
return batch_img, out

View file

@ -0,0 +1,822 @@
import numpy as np
import keras
import sys
import os
from keras.layers import Layer, Input, Multiply, Dropout, TimeDistributed, LSTM, Activation, Lambda, Conv2D, Dense, GlobalAveragePooling2D, MaxPooling2D, ZeroPadding2D, UpSampling2D, BatchNormalization, Concatenate, Add, DepthwiseConv2D
import keras.backend as K
from keras.models import Model
import tensorflow as tf
from keras.utils import Sequence
import cv2
import scipy.io
import math
from attentive_convlstm_new import AttentiveConvLSTM2D
from dcn_resnet_new import dcn_resnet
from gaussian_prior_new import LearningPrior
from sal_imp_utilities import *
from xception_custom import Xception_wrapper
#from keras.applications import keras_modules_injection
from keras.regularizers import l2
def decoder_block(x, dil_rate=(2,2), print_shapes=True, dec_filt=1024):
# Dilated convolutions
x = Conv2D(dec_filt, 3, padding='same', activation='relu', dilation_rate=(2, 2))(x)
x = Conv2D(dec_filt, 3, padding='same', activation='relu', dilation_rate=(2, 2))(x)
x = UpSampling2D((2,2), interpolation='bilinear')(x)
x = Conv2D(dec_filt//2, 3, padding='same', activation='relu', dilation_rate=(2, 2))(x)
x = Conv2D(dec_filt//2, 3, padding='same', activation='relu', dilation_rate=(2, 2))(x)
x = UpSampling2D((2,2), interpolation='bilinear')(x)
x = Conv2D(dec_filt//4, 3, padding='same', activation='relu', dilation_rate=(2, 2))(x)
x = Conv2D(dec_filt//4, 3, padding='same', activation='relu', dilation_rate=(2, 2))(x)
x = UpSampling2D((4,4), interpolation='bilinear')(x)
if print_shapes: print('Shape after last ups:',x.shape)
# Final conv to get to a heatmap
x = Conv2D(1, kernel_size=1, padding='same', activation='relu')(x)
if print_shapes: print('Shape after 1x1 conv:',x.shape)
return x
def decoder_block_simple(x, dil_rate=(2,2), print_shapes=True, dec_filt=1024):
x = Conv2D(dec_filt, 3, padding='same', activation='relu')(x)
x = UpSampling2D((2,2), interpolation='bilinear')(x)
x = Conv2D(dec_filt//2, 3, padding='same', activation='relu')(x)
x = UpSampling2D((2,2), interpolation='bilinear')(x)
x = Conv2D(dec_filt//4, 3, padding='same', activation='relu')(x)
x = UpSampling2D((4,4), interpolation='bilinear')(x)
if print_shapes: print('Shape after last ups:',x.shape)
# Final conv to get to a heatmap
x = Conv2D(1, kernel_size=1, padding='same', activation='relu')(x)
if print_shapes: print('Shape after 1x1 conv:',x.shape)
return x
def decoder_block_dp(x, dil_rate=(2,2), print_shapes=True, dec_filt=1024, dp=0.3):
# Dilated convolutions
x = Conv2D(dec_filt, 3, padding='same', activation='relu', dilation_rate=dil_rate)(x)
x = Conv2D(dec_filt, 3, padding='same', activation='relu', dilation_rate=dil_rate)(x)
x = Dropout(dp)(x)
x = UpSampling2D((2,2), interpolation='bilinear')(x)
x = Conv2D(dec_filt//2, 3, padding='same', activation='relu', dilation_rate=dil_rate)(x)
x = Conv2D(dec_filt//2, 3, padding='same', activation='relu', dilation_rate=dil_rate)(x)
x = Dropout(dp)(x)
x = UpSampling2D((2,2), interpolation='bilinear')(x)
x = Conv2D(dec_filt//4, 3, padding='same', activation='relu', dilation_rate=dil_rate)(x)
x = Dropout(dp)(x)
x = UpSampling2D((4,4), interpolation='bilinear')(x)
x = Conv2D(dec_filt//4, 3, padding='same', activation='relu', dilation_rate=dil_rate)(x)
x = Dropout(dp)(x)
x = UpSampling2D((4,4), interpolation='bilinear')(x)
if print_shapes: print('Shape after last ups:',x.shape)
# Final conv to get to a heatmap
x = Conv2D(1, kernel_size=1, padding='same', activation='relu')(x)
if print_shapes: print('Shape after 1x1 conv:',x.shape)
return x
######### ENCODER DECODER MODELS #############
def xception_decoder(input_shape = (shape_r, shape_c, 3),
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
dil_rate = (2,2)):
inp = Input(shape=input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception:',xception.output.shape)
## DECODER ##
outs_dec = decoder_block(xception.output, dil_rate=dil_rate, print_shapes=print_shapes, dec_filt=512)
outs_final = [outs_dec]*n_outs
# Building model
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
def resnet_decoder(input_shape = (shape_r, shape_c, 3),
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
dil_rate = (2,2)):
inp = Input(shape=input_shape)
### ENCODER ###
dcn = dcn_resnet(input_tensor=inp)
if print_shapes: print('resnet output shape:',dcn.output.shape)
## DECODER ##
outs_dec = decoder_block(dcn.output, dil_rate=dil_rate, print_shapes=print_shapes, dec_filt=512)
outs_final = [outs_dec]*n_outs
# Building model
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
def fcn_vgg16(input_shape=(shape_r, shape_c, 3),
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
dil_rate=(2,2),
freeze_enc=False,
freeze_cl=True,
internal_filts=256,
num_classes=4,
dp=0.3,
weight_decay=0.,
batch_shape=None):
if batch_shape:
img_input = Input(batch_shape=batch_shape)
image_size = batch_shape[1:3]
else:
img_input = Input(shape=input_shape)
image_size = input_shape[0:2]
# Block 1
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', kernel_regularizer=l2(weight_decay))(img_input)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', kernel_regularizer=l2(weight_decay))(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
# Block 2
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', kernel_regularizer=l2(weight_decay))(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', kernel_regularizer=l2(weight_decay))(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
# Block 3
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', kernel_regularizer=l2(weight_decay))(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', kernel_regularizer=l2(weight_decay))(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', kernel_regularizer=l2(weight_decay))(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
# Block 4
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', kernel_regularizer=l2(weight_decay))(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', kernel_regularizer=l2(weight_decay))(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', kernel_regularizer=l2(weight_decay))(x)
pool4 = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
# Block 5
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', kernel_regularizer=l2(weight_decay))(pool4)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', kernel_regularizer=l2(weight_decay))(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', kernel_regularizer=l2(weight_decay))(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
print("pool5 shape", x.shape)
# Convolutional layers transfered from fully-connected layers
x = Conv2D(4096, (7, 7), activation='relu', padding='same', name='fc1', kernel_regularizer=l2(weight_decay))(x)
x = Dropout(0.5)(x)
x = Conv2D(4096, (1, 1), activation='relu', padding='same', name='fc2', kernel_regularizer=l2(weight_decay))(x)
x = Dropout(0.5)(x)
# classification layer from fc7
classif_layer_fc7 = Conv2D(1, (1, 1), kernel_initializer='he_normal', activation='linear',
padding='valid', strides=(1, 1))(x)
print("classif_layer_fc7 shape", classif_layer_fc7.shape)
# Upsampling fc7 classif layer to sum with pool4 classif layer
classif_layer_fc7_ups = UpSampling2D(size=(2,2), interpolation="bilinear")(classif_layer_fc7)
print("classif_layer_fc7_ups shape", classif_layer_fc7_ups.shape)
# Lambda layer to match shape of pool4
def concat_one(fc7):
shape_fc7 = K.shape(fc7)
shape_zeros = (shape_fc7[0], 1, shape_fc7[2], shape_fc7[3] )
return K.concatenate([K.zeros(shape=shape_zeros), classif_layer_fc7_ups], axis=1)
classif_layer_fc7_ups = Lambda(concat_one)(classif_layer_fc7_ups)
print("classif_layer_fc7_ups shape after lambda:", classif_layer_fc7_ups.shape)
# Classification layer from pool4
classif_layer_pool4 = Conv2D(1, (1, 1), kernel_initializer='he_normal', activation='linear',
padding='valid', strides=(1, 1))(pool4)
x = Add()([classif_layer_pool4, classif_layer_fc7_ups])
outs_up = UpSampling2D(size=(32, 32), interpolation="bilinear")(x)
outs_final = [outs_up]*n_outs
model = Model(img_input, outs_final)
weights_path = '../../predimportance_shared/models/ckpt/fcn_vgg16/fcn_vgg16_weights_tf_dim_ordering_tf_kernels.h5'
model.load_weights(weights_path, by_name=True)
if verbose:
model.summary()
return model
############# SAM BASED MODELS ###############
def sam_simple(input_shape = (224, 224, 3), in_conv_filters=512,
verbose=True, print_shapes=True, n_outs=1, ups=8):
'''Simple network that uses an attentive convlstm and a few convolutions.'''
inp = Input(shape=input_shape)
x = Conv2D(filters=in_conv_filters, kernel_size=(3,3), strides=(2, 2), padding='same', data_format=None, dilation_rate=(1,1))(inp)
if print_shapes:
print('after first conv')
x = MaxPooling2D(pool_size=(4,4))(x)
if print_shapes:
print('after maxpool',x.shape)
x = Lambda(repeat, repeat_shape)(x)
if print_shapes:
print('after repeat',x.shape)
x = AttentiveConvLSTM2D(filters=512, attentive_filters=512, kernel_size=(3,3),
attentive_kernel_size=(3,3), padding='same', return_sequences=False)(x)
if print_shapes:
print('after ACLSTM',x.shape)
x = UpSampling2D(size=(ups,ups), interpolation='bilinear')(x)
outs_up = Conv2D(filters=1, kernel_size=(3,3), strides=(1, 1), padding='same', data_format=None, dilation_rate=(1,1))(x)
if print_shapes:
print('output shape',outs_up.shape)
outs_final = [outs_up]*n_outs
att_convlstm = Model(inputs=inp, outputs=outs_final)
if verbose:
att_convlstm.summary()
return att_convlstm
def sam_resnet_nopriors(input_shape = (224, 224, 3), conv_filters=128, lstm_filters=512,
att_filters=512, verbose=True, print_shapes=True, n_outs=1, ups=8):
'''Sam ResNet with no priors.'''
inp = Input(shape=input_shape)
dcn = dcn_resnet(input_tensor=inp)
conv_feat = Conv2D(conv_filters, 3, padding='same', activation='relu')(dcn.output)
if print_shapes:
print('Shape after first conv after dcn_resnet:',conv_feat.shape)
# Attentive ConvLSTM
att_convlstm = Lambda(repeat, repeat_shape)(conv_feat)
att_convlstm = AttentiveConvLSTM2D(filters=lstm_filters, attentive_filters=att_filters, kernel_size=(3,3),
attentive_kernel_size=(3,3), padding='same', return_sequences=False)(att_convlstm)
# Dilated convolutions (priors would go here)
dil_conv1 = Conv2D(conv_filters, 5, padding='same', activation='relu', dilation_rate=(4, 4))(att_convlstm)
dil_conv2 = Conv2D(conv_filters, 5, padding='same', activation='relu', dilation_rate=(4, 4))(dil_conv1)
# Final conv to get to a heatmap
outs = Conv2D(1, kernel_size=1, padding='same', activation='relu')(dil_conv2)
if print_shapes:
print('Shape after 1x1 conv:',outs.shape)
# Upsampling back to input shape
outs_up = UpSampling2D(size=(ups,ups), interpolation='bilinear')(outs)
if print_shapes:
print('shape after upsampling',outs_up.shape)
outs_final = [outs_up]*n_outs
# Building model
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
def sam_resnet_new(input_shape = (shape_r, shape_c, 3),
conv_filters=512,
lstm_filters=512,
att_filters=512,
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
nb_gaussian=nb_gaussian):
'''SAM-ResNet ported from the original code.'''
inp = Input(shape=input_shape)
dcn = dcn_resnet(input_tensor=inp)
conv_feat = Conv2D(conv_filters, 3, padding='same', activation='relu')(dcn.output)
if print_shapes:
print('Shape after first conv after dcn_resnet:',conv_feat.shape)
# Attentive ConvLSTM
att_convlstm = Lambda(repeat, repeat_shape)(conv_feat)
att_convlstm = AttentiveConvLSTM2D(filters=lstm_filters,
attentive_filters=att_filters,
kernel_size=(3,3),
attentive_kernel_size=(3,3),
padding='same',
return_sequences=False)(att_convlstm)
# Learned Prior (1)
priors1 = LearningPrior(nb_gaussian=nb_gaussian)(att_convlstm)
concat1 = Concatenate(axis=-1)([att_convlstm, priors1])
dil_conv1 = Conv2D(conv_filters, 5, padding='same', activation='relu', dilation_rate=(4, 4))(concat1)
# Learned Prior (2)
priors2 = LearningPrior(nb_gaussian=nb_gaussian)(att_convlstm)
concat2 = Concatenate(axis=-1)([dil_conv1, priors2])
dil_conv2 = Conv2D(conv_filters, 5, padding='same', activation='relu', dilation_rate=(4, 4))(concat2)
# Final conv to get to a heatmap
outs = Conv2D(1, kernel_size=1, padding='same', activation='relu')(dil_conv2)
if print_shapes:
print('Shape after 1x1 conv:',outs.shape)
# Upsampling back to input shape
outs_up = UpSampling2D(size=(ups,ups), interpolation='bilinear')(outs)
if print_shapes:
print('shape after upsampling',outs_up.shape)
outs_final = [outs_up]*n_outs
# Building model
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
def sam_xception_new(input_shape = (shape_r, shape_c, 3), conv_filters=512, lstm_filters=512, att_filters=512,
verbose=True, print_shapes=True, n_outs=1, ups=8, nb_gaussian=nb_gaussian):
'''SAM with a custom Xception as encoder.'''
inp = Input(shape=input_shape)
from xception_custom import Xception
from keras.applications import keras_modules_injection
#@keras_modules_injection
def Xception_wrapper(*args, **kwargs):
return Xception(*args, **kwargs)
inp = Input(shape = input_shape)
dcn = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception:',dcn.output.shape)
conv_feat = Conv2D(conv_filters, 3, padding='same', activation='relu')(dcn.output)
if print_shapes:
print('Shape after first conv after dcn_resnet:',conv_feat.shape)
# Attentive ConvLSTM
att_convlstm = Lambda(repeat, repeat_shape)(conv_feat)
att_convlstm = AttentiveConvLSTM2D(filters=lstm_filters, attentive_filters=att_filters, kernel_size=(3,3),
attentive_kernel_size=(3,3), padding='same', return_sequences=False)(att_convlstm)
# Learned Prior (1)
priors1 = LearningPrior(nb_gaussian=nb_gaussian)(att_convlstm)
concat1 = Concatenate(axis=-1)([att_convlstm, priors1])
dil_conv1 = Conv2D(conv_filters, 5, padding='same', activation='relu', dilation_rate=(4, 4))(concat1)
# Learned Prior (2)
priors2 = LearningPrior(nb_gaussian=nb_gaussian)(att_convlstm)
concat2 = Concatenate(axis=-1)([dil_conv1, priors2])
dil_conv2 = Conv2D(conv_filters, 5, padding='same', activation='relu', dilation_rate=(4, 4))(concat2)
# Final conv to get to a heatmap
outs = Conv2D(1, kernel_size=1, padding='same', activation='relu')(dil_conv2)
if print_shapes:
print('Shape after 1x1 conv:',outs.shape)
# Upsampling back to input shape
outs_up = UpSampling2D(size=(ups,ups), interpolation='bilinear')(outs)
if print_shapes:
print('shape after upsampling',outs_up.shape)
outs_final = [outs_up]*n_outs
# Building model
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
def xception_se_lstm_singledur(input_shape = (shape_r, shape_c, 3),
conv_filters=256,
lstm_filters=512,
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
freeze_enc=False,
return_sequences=False):
inp = Input(shape = input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
### LSTM over SE representation ###
x = se_lstm_block(xception.output, nb_timestep, lstm_filters=lstm_filters, return_sequences=return_sequences)
### DECODER ###
outs_dec = decoder_block(x, dil_rate=(2,2), print_shapes=print_shapes, dec_filt=conv_filters)
outs_final = [outs_dec]*n_outs
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
def se_lstm_block(inp, nb_timestep, units=512, print_shapes=True, lstm_filters=512, return_sequences=False):
inp_rep = Lambda(lambda y: K.repeat_elements(K.expand_dims(y, axis=1), nb_timestep, axis=1),
lambda s: (s[0], nb_timestep) + s[1:])(inp)
x = TimeDistributed(GlobalAveragePooling2D())(inp_rep)
if print_shapes: print('shape after AvgPool',x.shape)
x = TimeDistributed(Dense(units, activation='relu'))(x)
if print_shapes: print('shape after first dense',x.shape)
# Normally se block would feed into another fully connected. Instead, we feed it to an LSTM.
x = LSTM(lstm_filters, return_sequences=return_sequences, unroll=True, activation='relu')(x)
if print_shapes: print('shape after lstm',x.shape)
x = Dense(inp.shape[-1].value, activation='sigmoid')(x)
if print_shapes: print('shape after second dense:', x.shape)
x = Lambda(lambda y: K.expand_dims(K.expand_dims(y, axis=1),axis=1),
lambda s: (s[0], 1, 1, s[-1]))(x)
if print_shapes: print('shape before mult',x.shape)
out = Multiply()([x,inp])
print('shape out',out.shape)
# out is (bs, r, c, 2048)
return out
def xception_aspp(input_shape = (shape_r, shape_c, 3),
conv_filters=256,
lstm_filters=512,
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
freeze_enc=False,
return_sequences=False):
# Xception
# Conv1,2,ASPP
dil_conv1 = Conv2D(conv_filters, 3, padding='same', activation='relu', dilation_rate=(2, 2))(x)
dil_conv2 = Conv2D(conv_filters, 3, padding='same', activation='relu', dilation_rate=(4, 4))(x)
dil_conv3 = Conv2D(conv_filters, 3, padding='same', activation='relu', dilation_rate=(8, 8))(x)
pass
############# UMSI MODELS ###############
def UMSI(input_shape = (shape_r, shape_c, 3),
conv_filters=256,
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
freeze_enc=False,
return_sequences=False):
inp = Input(shape = input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
#ASPP
c0 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csep0")(xception.output)
c6 = DepthwiseConv2D((3,3),dilation_rate=(6,6),padding="same",use_bias=False,name="aspp_csepd6_depthwise")(xception.output)
c12 = DepthwiseConv2D((3,3),dilation_rate=(12,12),padding="same",use_bias=False,name="aspp_csepd12_depthwise")(xception.output)
c18 = DepthwiseConv2D((3,3),dilation_rate=(18,18),padding="same",use_bias=False,name="aspp_csepd18_depthwise")(xception.output)
c6 = BatchNormalization(name="aspp_csepd6_depthwise_BN")(c6)
c12 = BatchNormalization(name="aspp_csepd12_depthwise_BN")(c12)
c18 = BatchNormalization(name="aspp_csepd18_depthwise_BN")(c18)
c6 = Activation("relu", name = "activation_2")(c6)
c12 = Activation("relu", name = "activation_4")(c12)
c18 = Activation("relu", name = "activation_6")(c18)
c6 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd6_pointwise")(c6)
c12 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd12_pointwise")(c12)
c18 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd18_pointwise")(c18)
c0 = BatchNormalization(name='aspp0_BN')(c0)
c6 = BatchNormalization(name='aspp_csepd6_pointwise_BN')(c6)
c12 = BatchNormalization(name='aspp_csepd12_pointwise_BN')(c12)
c18 = BatchNormalization(name='aspp_csepd18_pointwise_BN')(c18)
c0 = Activation("relu", name = "aspp0_activation")(c0)
c6 = Activation("relu", name = "activation_3")(c6)
c12 = Activation("relu", name = "activation_5")(c12)
c18 = Activation("relu", name = "activation_7")(c18)
concat1 = Concatenate(name="concatenate_1")([c0,c6,c12,c18])
### classification module ###
x = Conv2D(256, (3,3), strides = (3,3), padding="same",use_bias=False,name = "global_conv")(xception.output)
x = BatchNormalization(name="global_BN")(x)
x = Activation("relu", name = "activation_1")(x)
x = Dropout(.3, name="dropout_1")(x)
x = GlobalAveragePooling2D(name = "global_average_pooling2d_1")(x)
x = Dense(256, name="global_dense")(x)
classif = Dropout(.3, name="dropout_2")(x)
out_classif = Dense(6, activation="softmax", name="out_classif")(classif)
x = Dense(256, name="dense_fusion")(classif)
def lambda_layer_function(x):
x = tf.reshape(x,(tf.shape(x)[0],1,1,256))
con = [x for i in range(30)]
con = tf.concat(con,axis=1)
con = tf.concat([con for i in range(40)],axis=2)
return con
x = Lambda(lambda_layer_function, name = "lambda_1")(x)
concat2 = Concatenate(name="concatenate_2")([concat1, x])
### DECODER ###
x = Conv2D(256,(1,1),padding="same",use_bias=False,name = "concat_projection")(concat2)
x = BatchNormalization(name="concat_projection_BN")(x)
x = Activation("relu", name="activation_8")(x)
x = Dropout(.3, name="dropout_3")(x)
x = Conv2D(256,(3,3),padding="same",use_bias=False,name = "dec_c1")(x)
x = Conv2D(256,(3,3),padding="same",use_bias=False,name = "dec_c2")(x)
x = Dropout(.3, name="dec_dp1")(x)
x = UpSampling2D(size=(2,2), interpolation='bilinear', name="dec_ups1")(x)
x = Conv2D(128,(3,3),padding="same",use_bias=False,name = "dec_c3")(x)
x = Conv2D(128,(3,3),padding="same",use_bias=False,name = "dec_c4")(x)
x = Dropout(.3, name="dec_dp2")(x)
x = UpSampling2D(size=(2,2), interpolation='bilinear', name="dec_ups2")(x)
x = Conv2D(64,(3,3),padding="same",use_bias=False,name = "dec_c5")(x)
x = Dropout(.3, name="dec_dp3")(x)
x = UpSampling2D(size=(4,4), interpolation='bilinear', name="dec_ups3")(x)
out_heatmap = Conv2D(1,(1,1),padding="same",use_bias=False,name = "dec_c_cout")(x)
# Building model
outs_final = [out_heatmap, out_classif]
print(out_heatmap.shape)
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
############# MODELS FOR RecallNet ###############
def RecallNet_UMSI(input_shape = (shape_r, shape_c, 3),
conv_filters=256,
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
freeze_enc=False,
return_sequences=False):
inp = Input(shape = input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
#ASPP
c0 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csep0")(xception.output)
c6 = DepthwiseConv2D((3,3),dilation_rate=(6,6),padding="same",use_bias=False,name="aspp_csepd6_depthwise")(xception.output)
c12 = DepthwiseConv2D((3,3),dilation_rate=(12,12),padding="same",use_bias=False,name="aspp_csepd12_depthwise")(xception.output)
c18 = DepthwiseConv2D((3,3),dilation_rate=(18,18),padding="same",use_bias=False,name="aspp_csepd18_depthwise")(xception.output)
c6 = BatchNormalization(name="aspp_csepd6_depthwise_BN")(c6)
c12 = BatchNormalization(name="aspp_csepd12_depthwise_BN")(c12)
c18 = BatchNormalization(name="aspp_csepd18_depthwise_BN")(c18)
c6 = Activation("relu", name = "activation_2")(c6)
c12 = Activation("relu", name = "activation_4")(c12)
c18 = Activation("relu", name = "activation_6")(c18)
c6 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd6_pointwise")(c6)
c12 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd12_pointwise")(c12)
c18 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd18_pointwise")(c18)
c0 = BatchNormalization(name='aspp0_BN')(c0)
c6 = BatchNormalization(name='aspp_csepd6_pointwise_BN')(c6)
c12 = BatchNormalization(name='aspp_csepd12_pointwise_BN')(c12)
c18 = BatchNormalization(name='aspp_csepd18_pointwise_BN')(c18)
c0 = Activation("relu", name = "aspp0_activation")(c0)
c6 = Activation("relu", name = "activation_3")(c6)
c12 = Activation("relu", name = "activation_5")(c12)
c18 = Activation("relu", name = "activation_7")(c18)
concat1 = Concatenate(name="concatenate_1")([c0,c6,c12,c18])
### classification module ###
x = Conv2D(256, (3,3), strides = (3,3), padding="same",use_bias=False,name = "global_conv")(xception.output)
x = BatchNormalization(name="global_BN")(x)
x = Activation("relu", name = "activation_1")(x)
x = Dropout(.3, name="dropout_1")(x)
x = GlobalAveragePooling2D(name = "global_average_pooling2d_1")(x)
x = Dense(256, name="global_dense")(x)
classif = Dropout(.3, name="dropout_2")(x)
out_classif = Dense(6, activation="softmax", name="out_classif")(classif)
x = Dense(256, name="dense_fusion")(classif)
def lambda_layer_function(x):
x = tf.reshape(x,(tf.shape(x)[0],1,1,256))
con = [x for i in range(30)]
con = tf.concat(con,axis=1)
con = tf.concat([con for i in range(40)],axis=2)
return con
x = Lambda(lambda_layer_function, name = "lambda_1")(x)
concat2 = Concatenate(name="concatenate_2")([concat1, x])
### DECODER ###
flatten = GlobalAveragePooling2D(name = "global_average_pooling2d_2")(concat2)
mean_acc = Dense(256, name="mean_dense")(flatten)
mean_acc = Dense(1, name='out_mean_acc')(mean_acc)
type0_acc = Dense(256, name="type0_dense")(flatten)
type0_acc = Dense(1, name='out_type0_acc')(type0_acc)
# Building model
outs_final = [type0_acc, mean_acc, out_classif]
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
#Model for RecallNet
def RecallNet_xception_aspp(input_shape = (shape_r, shape_c, 3),
conv_filters=256,
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
freeze_enc=False,
return_sequences=False):
inp = Input(shape = input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
#ASPP
c0 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csep0")(xception.output)
c6 = DepthwiseConv2D((3,3),dilation_rate=(6,6),padding="same",use_bias=False,name="aspp_csepd6_depthwise")(xception.output)
c12 = DepthwiseConv2D((3,3),dilation_rate=(12,12),padding="same",use_bias=False,name="aspp_csepd12_depthwise")(xception.output)
c18 = DepthwiseConv2D((3,3),dilation_rate=(18,18),padding="same",use_bias=False,name="aspp_csepd18_depthwise")(xception.output)
c6 = BatchNormalization(name="aspp_csepd6_depthwise_BN")(c6)
c12 = BatchNormalization(name="aspp_csepd12_depthwise_BN")(c12)
c18 = BatchNormalization(name="aspp_csepd18_depthwise_BN")(c18)
c6 = Activation("relu", name = "activation_2")(c6)
c12 = Activation("relu", name = "activation_4")(c12)
c18 = Activation("relu", name = "activation_6")(c18)
c6 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd6_pointwise")(c6)
c12 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd12_pointwise")(c12)
c18 = Conv2D(256,(1,1),padding="same",use_bias=False,name = "aspp_csepd18_pointwise")(c18)
c0 = BatchNormalization(name='aspp0_BN')(c0)
c6 = BatchNormalization(name='aspp_csepd6_pointwise_BN')(c6)
c12 = BatchNormalization(name='aspp_csepd12_pointwise_BN')(c12)
c18 = BatchNormalization(name='aspp_csepd18_pointwise_BN')(c18)
c0 = Activation("relu", name = "aspp0_activation")(c0)
c6 = Activation("relu", name = "activation_3")(c6)
c12 = Activation("relu", name = "activation_5")(c12)
c18 = Activation("relu", name = "activation_7")(c18)
concat1 = Concatenate(name="concatenate_1")([c0,c6,c12,c18])
### classification module ###
x = Conv2D(256, (3,3), strides = (3,3), padding="same",use_bias=False,name = "global_conv")(xception.output)
x = BatchNormalization(name="global_BN")(x)
x = Activation("relu", name = "activation_1")(x)
x = Dropout(.3, name="dropout_1")(x)
x = GlobalAveragePooling2D(name = "global_average_pooling2d_1")(x)
x = Dense(256, name="global_dense")(x)
classif = Dropout(.3, name="dropout_2")(x)
#out_classif = Dense(6, activation="softmax", name="out_classif")(classif)
x = Dense(256, name="dense_fusion")(classif)
def lambda_layer_function(x):
x = tf.reshape(x,(tf.shape(x)[0],1,1,256))
con = [x for i in range(30)]
con = tf.concat(con,axis=1)
con = tf.concat([con for i in range(40)],axis=2)
return con
x = Lambda(lambda_layer_function, name = "lambda_1")(x)
concat2 = Concatenate(name="concatenate_2")([concat1, x])
### DECODER ###
flatten = GlobalAveragePooling2D(name = "global_average_pooling2d_2")(concat2)
mean_acc = Dense(256, name="mean_dense")(flatten)
mean_acc = Dense(1, name='out_mean_acc')(mean_acc)
type0_acc = Dense(256, name="type0_dense")(flatten)
type0_acc = Dense(1, name='out_type0_acc')(type0_acc)
# Building model
outs_final = [mean_acc, type0_acc]
m = Model(inp, outs_final)
if verbose:
m.summary()
return m
#Model for VMQA
def RecallNet_xception(input_shape = (shape_r, shape_c, 3),
conv_filters=256,
verbose=True,
print_shapes=True,
n_outs=1,
ups=8,
freeze_enc=False,
return_sequences=False):
inp = Input(shape = input_shape)
### ENCODER ###
xception = Xception_wrapper(include_top=False, weights='imagenet', input_tensor=inp, pooling=None)
if print_shapes: print('xception output shapes:',xception.output.shape)
if freeze_enc:
for layer in xception.layers:
layer.trainable = False
x = Conv2D(256, (3,3), strides = (3,3), padding="same",use_bias=False,name = "global_conv")(xception.output)
x = BatchNormalization(name="global_BN")(x)
x = Activation("relu", name = "activation_1")(x)
x = Dropout(.3, name="dropout_1")(x)
x = GlobalAveragePooling2D(name = "global_average_pooling2d_1")(x)
x = Dense(256, name="global_dense")(x)
classif = Dropout(.3, name="dropout_2")(x)
# out_classif = Dense(6, activation="softmax", name="out_classif")(classif)
mean_acc = Dense(1, name='out_mean_acc')(classif)
type_0 = Dense(1, name='out_type0_acc')(classif)
# Building model
outs_final = [ mean_acc, type_0]
m = Model(inp, outs_final)
if verbose:
m.summary()
return m

67
RecallNet/src/util.py Normal file
View file

@ -0,0 +1,67 @@
from singleduration_models import sam_resnet_new, UMSI, RecallNet_UMSI, RecallNet_xception, RecallNet_xception_aspp
from losses_keras2 import loss_wrapper, kl_time, cc_time, nss_time, cc_match, kl_cc_combined
MODELS = {
'sam-resnet': (sam_resnet_new, 'simple'),
"UMSI": (UMSI, "simple"),
'RecallNet_xception':(RecallNet_xception,'simple'),
'RecallNet_xception_aspp':(RecallNet_xception_aspp,'simple'),
'RecallNet_UMSI':(RecallNet_UMSI,'simple')
}
LOSSES = {
'kl': (kl_time, 'heatmap'),
'cc': (cc_time, 'heatmap'),
'nss': (nss_time, 'fixmap'),
'ccmatch': (cc_match, 'heatmap'),
"kl+cc": (kl_cc_combined, "heatmap")
}
def get_model_by_name(name):
""" Returns a model and a string indicating its mode of use."""
if name not in MODELS:
allowed_models = list(MODELS.keys())
raise RuntimeError("Model %s is not recognized. Please choose one of: %s" % (name, ",".join(allowed_models)))
else:
return MODELS[name]
def get_loss_by_name(name, out_size):
"""Gets the loss associated with a certain name.
If there is no custom loss associated with name `name`, returns the string
`name` so that keras can interpret it as a keras loss.
"""
if name not in LOSSES:
print("WARNING: found no custom loss with name %s, defaulting to a string." % name)
return name, 'heatmap'
else:
loss, out_type = LOSSES[name]
loss = loss_wrapper(loss, out_size)
return loss, out_type
def create_losses(loss_dict, out_size):
"""Given a dictionary that maps loss names to weights, returns loss functions and weights in the correct order.
By convention, losses that take in a heatmap (as opposed to a fixmap) come first in the array of losses. This function enforces that convention.
This function looks up the correct loss function by name and outputs the correct functions, ordering, and weights to pass to the model/generator.
"""
l_hm = []
l_hm_w = []
l_fm = []
l_fm_w = []
lstr = ""
for lname, wt in loss_dict.items():
loss, out_type = get_loss_by_name(lname, out_size)
if out_type == 'heatmap':
l_hm.append(loss)
l_hm_w.append(wt)
else:
l_fm.append(loss)
l_fm_w.append(wt)
lstr += lname + str(wt)
l = l_hm + l_fm
lw = l_hm_w + l_fm_w
n_heatmaps = len(l_hm)
return l, lw, lstr, n_heatmaps

View file

@ -0,0 +1,325 @@
"""Xception V1 model for Keras.
On ImageNet, this model gets to a top-1 validation accuracy of 0.790
and a top-5 validation accuracy of 0.945.
Do note that the input image format for this model is different than for
the VGG16 and ResNet models (299x299 instead of 224x224),
and that the input preprocessing function
is also different (same as Inception V3).
# Reference
- [Xception: Deep Learning with Depthwise Separable Convolutions](
https://arxiv.org/abs/1610.02357)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import warnings
from keras_applications import get_submodules_from_kwargs
from keras.applications import imagenet_utils
from keras.applications.imagenet_utils import decode_predictions
from keras_applications.imagenet_utils import _obtain_input_shape
from keras.applications import keras_modules_injection
TF_WEIGHTS_PATH = (
'https://github.com/fchollet/deep-learning-models/'
'releases/download/v0.4/'
'xception_weights_tf_dim_ordering_tf_kernels.h5')
TF_WEIGHTS_PATH_NO_TOP = (
'https://github.com/fchollet/deep-learning-models/'
'releases/download/v0.4/'
'xception_weights_tf_dim_ordering_tf_kernels_notop.h5')
@keras_modules_injection
def Xception_wrapper(*args, **kwargs):
return Xception(*args, **kwargs)
def Xception(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
**kwargs):
"""Instantiates the Xception architecture.
Optionally loads weights pre-trained on ImageNet. This model can
only be used with the data format `(width, height, channels)`.
You should set `image_data_format='channels_last'` in your Keras config
located at ~/.keras/keras.json.
Note that the default input image size for this model is 299x299.
# Arguments
include_top: whether to include the fully-connected
layer at the top of the network.
weights: one of `None` (random initialization),
'imagenet' (pre-training on ImageNet),
or the path to the weights file to be loaded.
input_tensor: optional Keras tensor
(i.e. output of `layers.Input()`)
to use as image input for the model.
input_shape: optional shape tuple, only to be specified
if `include_top` is False (otherwise the input shape
has to be `(299, 299, 3)`.
It should have exactly 3 inputs channels,
and width and height should be no smaller than 71.
E.g. `(150, 150, 3)` would be one valid value.
pooling: Optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be
the 4D tensor output of the
last convolutional block.
- `avg` means that global average pooling
will be applied to the output of the
last convolutional block, and thus
the output of the model will be a 2D tensor.
- `max` means that global max pooling will
be applied.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True,
and if no `weights` argument is specified.
# Returns
A Keras model instance.
# Raises
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
RuntimeError: If attempting to run this model with a
backend that does not support separable convolutions.
"""
backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
if not (weights in {'imagenet', None} or os.path.exists(weights)):
raise ValueError('The `weights` argument should be either '
'`None` (random initialization), `imagenet` '
'(pre-training on ImageNet), '
'or the path to the weights file to be loaded.')
if weights == 'imagenet' and include_top and classes != 1000:
raise ValueError('If using `weights` as `"imagenet"` with `include_top`'
' as true, `classes` should be 1000')
if backend.image_data_format() != 'channels_last':
warnings.warn('The Xception model is only available for the '
'input data format "channels_last" '
'(width, height, channels). '
'However your settings specify the default '
'data format "channels_first" '
'(channels, width, height). '
'You should set `image_data_format="channels_last"` '
'in your Keras '
'config located at ~/.keras/keras.json. '
'The model being returned right now will expect inputs '
'to follow the "channels_last" data format.')
backend.set_image_data_format('channels_last')
old_data_format = 'channels_first'
else:
old_data_format = None
# Determine proper input shape
input_shape = _obtain_input_shape(input_shape,
default_size=299,
min_size=71,
data_format=backend.image_data_format(),
require_flatten=include_top,
weights=weights)
if input_tensor is None:
img_input = layers.Input(shape=input_shape)
else:
if not backend.is_keras_tensor(input_tensor):
img_input = layers.Input(tensor=input_tensor, shape=input_shape)
else:
img_input = input_tensor
x = layers.Conv2D(32, (3, 3),
strides=(2, 2),
use_bias=False,
name='block1_conv1')(img_input)
x = layers.BatchNormalization(name='block1_conv1_bn')(x)
x = layers.Activation('relu', name='block1_conv1_act')(x)
x = layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
x = layers.BatchNormalization(name='block1_conv2_bn')(x)
x = layers.Activation('relu', name='block1_conv2_act')(x)
residual = layers.Conv2D(128, (1, 1),
strides=(2, 2),
padding='same',
use_bias=False)(x)
residual = layers.BatchNormalization()(residual)
x = layers.SeparableConv2D(128, (3, 3),
padding='same',
use_bias=False,
name='block2_sepconv1')(x)
x = layers.BatchNormalization(name='block2_sepconv1_bn')(x)
x = layers.Activation('relu', name='block2_sepconv2_act')(x)
x = layers.SeparableConv2D(128, (3, 3),
padding='same',
use_bias=False,
name='block2_sepconv2')(x)
x = layers.BatchNormalization(name='block2_sepconv2_bn')(x)
x = layers.MaxPooling2D((3, 3),
strides=(2, 2),
padding='same',
name='block2_pool')(x)
x = layers.add([x, residual])
residual = layers.Conv2D(256, (1, 1), strides=(2, 2),
padding='same', use_bias=False)(x)
residual = layers.BatchNormalization()(residual)
x = layers.Activation('relu', name='block3_sepconv1_act')(x)
x = layers.SeparableConv2D(256, (3, 3),
padding='same',
use_bias=False,
name='block3_sepconv1')(x)
x = layers.BatchNormalization(name='block3_sepconv1_bn')(x)
x = layers.Activation('relu', name='block3_sepconv2_act')(x)
x = layers.SeparableConv2D(256, (3, 3),
padding='same',
use_bias=False,
name='block3_sepconv2')(x)
x = layers.BatchNormalization(name='block3_sepconv2_bn')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2),
padding='same',
name='block3_pool')(x)
x = layers.add([x, residual])
residual = layers.Conv2D(728, (1, 1),
strides=(1, 1),# ORIGINAL (2,2)
padding='same',
use_bias=False)(x)
residual = layers.BatchNormalization()(residual)
x = layers.Activation('relu', name='block4_sepconv1_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name='block4_sepconv1')(x)
x = layers.BatchNormalization(name='block4_sepconv1_bn')(x)
x = layers.Activation('relu', name='block4_sepconv2_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name='block4_sepconv2')(x)
x = layers.BatchNormalization(name='block4_sepconv2_bn')(x)
x = layers.MaxPooling2D((3, 3), strides=(1, 1),# ORIGINAL (2,2)
padding='same',
name='block4_pool')(x)
x = layers.add([x, residual])
for i in range(8):
residual = x
prefix = 'block' + str(i + 5)
x = layers.Activation('relu', name=prefix + '_sepconv1_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name=prefix + '_sepconv1')(x)
x = layers.BatchNormalization(name=prefix + '_sepconv1_bn')(x)
x = layers.Activation('relu', name=prefix + '_sepconv2_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name=prefix + '_sepconv2')(x)
x = layers.BatchNormalization(name=prefix + '_sepconv2_bn')(x)
x = layers.Activation('relu', name=prefix + '_sepconv3_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name=prefix + '_sepconv3')(x)
x = layers.BatchNormalization(name=prefix + '_sepconv3_bn')(x)
x = layers.add([x, residual])
residual = layers.Conv2D(1024, (1, 1), strides=(1, 1),# ORIGINAL (2,2)
padding='same', use_bias=False)(x)
residual = layers.BatchNormalization()(residual)
x = layers.Activation('relu', name='block13_sepconv1_act')(x)
x = layers.SeparableConv2D(728, (3, 3),
padding='same',
use_bias=False,
name='block13_sepconv1')(x)
x = layers.BatchNormalization(name='block13_sepconv1_bn')(x)
x = layers.Activation('relu', name='block13_sepconv2_act')(x)
x = layers.SeparableConv2D(1024, (3, 3),
padding='same',
use_bias=False,
name='block13_sepconv2')(x)
x = layers.BatchNormalization(name='block13_sepconv2_bn')(x)
x = layers.MaxPooling2D((3, 3),
strides=(1, 1), # ORIGINAL (2,2)
padding='same',
name='block13_pool')(x)
x = layers.add([x, residual])
x = layers.SeparableConv2D(1536, (3, 3),
padding='same',
use_bias=False,
name='block14_sepconv1')(x)
x = layers.BatchNormalization(name='block14_sepconv1_bn')(x)
x = layers.Activation('relu', name='block14_sepconv1_act')(x)
x = layers.SeparableConv2D(2048, (3, 3),
padding='same',
use_bias=False,
name='block14_sepconv2')(x)
x = layers.BatchNormalization(name='block14_sepconv2_bn')(x)
x = layers.Activation('relu', name='block14_sepconv2_act')(x)
if include_top:
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='softmax', name='predictions')(x)
else:
if pooling == 'avg':
x = layers.GlobalAveragePooling2D()(x)
elif pooling == 'max':
x = layers.GlobalMaxPooling2D()(x)
# Ensure that the model takes into account
# any potential predecessors of `input_tensor`.
if input_tensor is not None:
inputs = keras_utils.get_source_inputs(input_tensor)
else:
inputs = img_input
# Create model.
model = models.Model(inputs, x, name='xception')
# Load weights.
if weights == 'imagenet':
if include_top:
weights_path = keras_utils.get_file(
'xception_weights_tf_dim_ordering_tf_kernels.h5',
TF_WEIGHTS_PATH,
cache_subdir='models',
file_hash='0a58e3b7378bc2990ea3b43d5981f1f6')
else:
weights_path = keras_utils.get_file(
'xception_weights_tf_dim_ordering_tf_kernels_notop.h5',
TF_WEIGHTS_PATH_NO_TOP,
cache_subdir='models',
file_hash='b0042744bf5b25fce3cb969f33bebb97')
model.load_weights(weights_path)
if backend.backend() == 'theano':
keras_utils.convert_all_kernels_in_model(model)
elif weights is not None:
model.load_weights(weights)
if old_data_format:
backend.set_image_data_format(old_data_format)
return model
def preprocess_input(x, **kwargs):
"""Preprocesses a numpy array encoding a batch of images.
# Arguments
x: a 4D numpy array consists of RGB values within [0, 255].
# Returns
Preprocessed array.
"""
return imagenet_utils.preprocess_input(x, mode='tf', **kwargs)