diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..65531ca
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..12cf37d
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/StackGAN.iml" filepath="$PROJECT_DIR$/.idea/StackGAN.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000..c24ecf8
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="8dc1e2c0-5670-42b1-8dc9-47a56e3037a7" name="Default Changelist" comment="" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="ProjectFrameBounds" extendedState="6">
+    <option name="x" value="1987" />
+    <option name="y" value="25" />
+    <option name="width" value="1853" />
+    <option name="height" value="1055" />
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="Scope" />
+      <pane id="ProjectPane" />
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="last_opened_file_path" value="$USER_HOME$/StackGAN-v1" />
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="8dc1e2c0-5670-42b1-8dc9-47a56e3037a7" name="Default Changelist" comment="" />
+      <created>1562733251523</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1562733251523</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="1987" y="25" width="1853" height="1055" extended-state="6" />
+    <layout>
+      <window_info id="Favorites" side_tool="true" />
+      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24972677" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info anchor="bottom" id="Version Control" />
+      <window_info anchor="bottom" id="Python Console" />
+      <window_info anchor="bottom" id="Terminal" />
+      <window_info anchor="bottom" id="Event Log" side_tool="true" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info anchor="bottom" id="Run" order="2" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+    </layout>
+  </component>
+</project>
\ No newline at end of file
diff --git a/README.md b/README.md
index 4800bd2..18e890d 100755
--- a/README.md
+++ b/README.md
@@ -11,16 +11,15 @@ Tensorflow implementation for reproducing main results in the paper [StackGAN: T
 
 
 ### Dependencies
-python 2.7
+python 3.6+
 
-[TensorFlow 0.12](https://www.tensorflow.org/get_started/os_setup)
+[TensorFlow 1.13+](https://www.tensorflow.org/get_started/os_setup)
 
 [Optional] [Torch](http://torch.ch/docs/getting-started.html#_) is needed, if use the pre-trained char-CNN-RNN text encoder.
 
 [Optional] [skip-thought](https://github.com/ryankiros/skip-thoughts) is needed, if use the skip-thought text encoder.
 
 In addition, please add the project folder to PYTHONPATH and `pip install` the following packages:
-- `prettytensor`
 - `progressbar`
 - `python-dateutil`
 - `easydict`
@@ -32,7 +31,12 @@ In addition, please add the project folder to PYTHONPATH and `pip install` the f
 **Data**
 
 1. Download our preprocessed char-CNN-RNN text embeddings for [birds](https://drive.google.com/open?id=0B3y_msrWZaXLT1BZdVdycDY5TEE) and [flowers](https://drive.google.com/open?id=0B3y_msrWZaXLaUc0UXpmcnhaVmM) and save them to `Data/`.
+
   - [Optional] Follow the instructions [reedscot/icml2016](https://github.com/reedscot/icml2016) to download the pretrained char-CNN-RNN text encoders and extract text embeddings.
+
+  - [Optional] Download our preprocessed skip-thoughts text embeddings for [birds](https://drive.google.com/open?id=10jlSsU3g2ywDFXgUmn2Dh_UJCkQectzy) and save them to `Data/`.
+
+
 2. Download the [birds](http://www.vision.caltech.edu/visipedia/CUB-200-2011.html) and [flowers](http://www.robots.ox.ac.uk/~vgg/data/flowers/102/) image data. Extract them to `Data/birds/` and `Data/flowers/`, respectively.
 3. Preprocess images.
   - For birds: `python misc/preprocess_birds.py`
@@ -51,9 +55,9 @@ In addition, please add the project folder to PYTHONPATH and `pip install` the f
 
 
 **Pretrained Model**
-- [StackGAN for birds](https://drive.google.com/open?id=0B3y_msrWZaXLNUNKa3BaRjAyTzQ) trained from char-CNN-RNN text embeddings. Download and save it to `models/`.
-- [StackGAN for flowers](https://drive.google.com/open?id=0B3y_msrWZaXLX01FMC1JQW9vaFk) trained from char-CNN-RNN text embeddings. Download and save it to `models/`.
-- [StackGAN for birds](https://drive.google.com/open?id=0B3y_msrWZaXLZVNRNFg4d055Q1E) trained from skip-thought text embeddings. Download and save it to `models/` (Just used the same setting as the char-CNN-RNN. We assume better results can be achieved by playing with the hyper-parameters).
+- [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from char-CNN-RNN text embeddings. Download and save it to `models/`.
+- [StackGAN for flowers]() trained from char-CNN-RNN text embeddings. Download and save it to `models/`.
+- [StackGAN for birds]() trained from skip-thought text embeddings. Download and save it to `models/` (Just used the same setting as the char-CNN-RNN. We assume better results can be achieved by playing with the hyper-parameters).
 
 
 
@@ -96,6 +100,12 @@ booktitle = {{ICCV}},
 - [StackGAN++: Realistic Image Synthesis with Stacked Generative Adversarial Networks](https://arxiv.org/abs/1710.10916)
 - [AttnGAN: Fine-Grained Text to Image Generation with Attentional Generative Adversarial Networks](https://arxiv.org/abs/1711.10485) [[supplementary]](https://1drv.ms/b/s!Aj4exx_cRA4ghK5-kUG-EqH7hgknUA) [[code]](https://github.com/taoxugit/AttnGAN)
 
+**Future**
+
+[Fashion Expansion](https://github.com/1o0ko/StackGAN-v1-TensorFlow)
+
+[Fashion Dataset](https://github.com/ayushidalmia/awesome-fashion-ai#datasets)
+
 **References**
 
 - Generative Adversarial Text-to-Image Synthesis [Paper](https://arxiv.org/abs/1605.05396) [Code](https://github.com/reedscot/icml2016)
diff --git a/demo/birds_demo.sh b/demo/birds_demo.sh
old mode 100644
new mode 100755
index 66ff9ab..162cec8
--- a/demo/birds_demo.sh
+++ b/demo/birds_demo.sh
@@ -1,3 +1,4 @@
+#!/usr/bin/env bash
 #
 # Extract text embeddings from the encoder
 #
@@ -15,7 +16,7 @@ th demo/get_embedding.lua
 #
 # Generate image from text embeddings
 #
-python demo/demo.py \
+python3 demo/demo.py \
 --cfg demo/cfg/birds-demo.yml \
 --gpu ${GPU} \
 --caption_path ${CAPTION_PATH}.t7
diff --git a/demo/birds_skip_thought_demo.py b/demo/birds_skip_thought_demo.py
index cddb21f..f25303a 100644
--- a/demo/birds_skip_thought_demo.py
+++ b/demo/birds_skip_thought_demo.py
@@ -1,30 +1,29 @@
 from __future__ import division
 from __future__ import print_function
 
-import prettytensor as pt
 import tensorflow as tf
 import numpy as np
-import scipy.misc
+import imageio
 import os
 import argparse
 from PIL import Image, ImageDraw, ImageFont
 
-from misc.config import cfg, cfg_from_file
-from misc.utils import mkdir_p
-from misc import skipthoughts
-from stageII.model import CondGAN
+import sys
+sys.path.append('misc')
+sys.path.append('stageII')
+
+import skipthoughts
+from config import cfg, cfg_from_file
+from utils import mkdir_p
+from model import CondGAN
+from skimage.transform import resize
 
 
 def parse_args():
     parser = argparse.ArgumentParser(description='Train a GAN network')
-    parser.add_argument('--cfg', dest='cfg_file',
-                        help='optional config file',
-                        default=None, type=str)
-    parser.add_argument('--gpu', dest='gpu_id',
-                        help='GPU device id to use [0]',
-                        default=-1, type=int)
-    parser.add_argument('--caption_path', type=str, default=None,
-                        help='Path to the file with text sentences')
+    parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str)
+    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=-1, type=int)
+    parser.add_argument('--caption_path', type=str, default=None, help='Path to the file with text sentences')
     # if len(sys.argv) == 1:
     #    parser.print_help()
     #    sys.exit(1)
@@ -49,21 +48,17 @@ def sample_encoded_context(embeddings, model, bAugmentation=True):
 
 
 def build_model(sess, embedding_dim, batch_size):
-    model = CondGAN(
-        lr_imsize=cfg.TEST.LR_IMSIZE,
-        hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE))
-
-    embeddings = tf.placeholder(
-        tf.float32, [batch_size, embedding_dim],
-        name='conditional_embeddings')
-    with pt.defaults_scope(phase=pt.Phase.test):
-        with tf.variable_scope("g_net"):
-            c = sample_encoded_context(embeddings, model)
-            z = tf.random_normal([batch_size, cfg.Z_DIM])
-            fake_images = model.get_generator(tf.concat(1, [c, z]))
-        with tf.variable_scope("hr_g_net"):
-            hr_c = sample_encoded_context(embeddings, model)
-            hr_fake_images = model.hr_get_generator(fake_images, hr_c)
+    model = CondGAN(lr_imsize=cfg.TEST.LR_IMSIZE, hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE))
+
+    embeddings = tf.placeholder(tf.float32, [batch_size, embedding_dim], name='conditional_embeddings')
+
+    with tf.variable_scope("g_net"):
+        c = sample_encoded_context(embeddings, model)
+        z = tf.random_normal([batch_size, cfg.Z_DIM])
+        fake_images = model.get_generator(tf.concat([c, z], 1,), False)
+    with tf.variable_scope("hr_g_net"):
+        hr_c = sample_encoded_context(embeddings, model)
+        hr_fake_images = model.hr_get_generator(fake_images, hr_c, False)
 
     ckt_path = cfg.TEST.PRETRAINED_MODEL
     if ckt_path.find('.ckpt') != -1:
@@ -101,9 +96,7 @@ def drawCaption(img, caption):
     return img_txt
 
 
-def save_super_images(sample_batchs, hr_sample_batchs,
-                      captions_batch, batch_size,
-                      startID, save_dir):
+def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_size, startID, save_dir):
     if not os.path.isdir(save_dir):
         print('Make a new folder: ', save_dir)
         mkdir_p(save_dir)
@@ -119,7 +112,7 @@ def save_super_images(sample_batchs, hr_sample_batchs,
             lr_img = sample_batchs[i][j]
             hr_img = hr_sample_batchs[i][j]
             hr_img = (hr_img + 1.0) * 127.5
-            re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
+            re_sample = resize(lr_img, hr_img.shape[:2])
             row1.append(re_sample)
             row2.append(hr_img)
         row1 = np.concatenate(row1, axis=1)
@@ -134,27 +127,23 @@ def save_super_images(sample_batchs, hr_sample_batchs,
                 lr_img = sample_batchs[i][j]
                 hr_img = hr_sample_batchs[i][j]
                 hr_img = (hr_img + 1.0) * 127.5
-                re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
+                re_sample = resize(lr_img, hr_img.shape[:2])
                 row1.append(re_sample)
                 row2.append(hr_img)
             row1 = np.concatenate(row1, axis=1)
             row2 = np.concatenate(row2, axis=1)
             super_row = np.concatenate([row1, row2], axis=0)
             superimage2 = np.zeros_like(superimage)
-            superimage2[:super_row.shape[0],
-                        :super_row.shape[1],
-                        :super_row.shape[2]] = super_row
+            superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row
             mid_padding = np.zeros((64, superimage.shape[1], 3))
-            superimage =\
-                np.concatenate([superimage, mid_padding, superimage2], axis=0)
+            superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0)
 
         top_padding = np.zeros((128, superimage.shape[1], 3))
-        superimage =\
-            np.concatenate([top_padding, superimage], axis=0)
+        superimage = np.concatenate([top_padding, superimage], axis=0)
 
         fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j)
         superimage = drawCaption(np.uint8(superimage), captions_batch[j])
-        scipy.misc.imsave(fullpath, superimage)
+        imageio.imsave(fullpath, superimage)
 
 
 if __name__ == "__main__":
@@ -188,8 +177,8 @@ def save_super_images(sample_batchs, hr_sample_batchs,
         config = tf.ConfigProto(allow_soft_placement=True)
         with tf.Session(config=config) as sess:
             with tf.device("/gpu:%d" % cfg.GPU_ID):
-                embeddings_holder, fake_images_opt, hr_fake_images_opt =\
-                    build_model(sess, embeddings.shape[-1], batch_size)
+                embeddings_holder, fake_images_opt, hr_fake_images_opt = build_model(sess, embeddings.shape[-1],
+                                                                                     batch_size)
 
                 count = 0
                 while count < num_embeddings:
@@ -205,19 +194,14 @@ def save_super_images(sample_batchs, hr_sample_batchs,
                     # Generate up to 16 images for each sentence with
                     # randomness from noise z and conditioning augmentation.
                     for i in range(np.minimum(16, cfg.TEST.NUM_COPY)):
-                        hr_samples, samples =\
-                            sess.run([hr_fake_images_opt, fake_images_opt],
-                                     {embeddings_holder: embeddings_batch})
+                        hr_samples, samples = sess.run([hr_fake_images_opt, fake_images_opt],
+                                                       {embeddings_holder: embeddings_batch})
                         samples_batchs.append(samples)
                         hr_samples_batchs.append(hr_samples)
-                    save_super_images(samples_batchs,
-                                      hr_samples_batchs,
-                                      captions_batch,
-                                      batch_size,
-                                      count, save_dir)
+                    save_super_images(samples_batchs, hr_samples_batchs, captions_batch, batch_size, count, save_dir)
                     count += batch_size
 
         print('Finish generating samples for %d sentences:' % num_embeddings)
         print('Example sentences:')
-        for i in xrange(np.minimum(10, num_embeddings)):
+        for i in range(np.minimum(10, num_embeddings)):
             print('Sentence %d: %s' % (i, captions_list[i]))
diff --git a/demo/cfg/birds-demo.yml b/demo/cfg/birds-demo.yml
index 8526652..96b5286 100644
--- a/demo/cfg/birds-demo.yml
+++ b/demo/cfg/birds-demo.yml
@@ -5,7 +5,7 @@ GPU_ID: 0
 Z_DIM: 100
 
 TEST:
-    PRETRAINED_MODEL: './models/birds_model_164000.ckpt'
+    PRETRAINED_MODEL: './models/stageII/model_330000.ckpt'
     BATCH_SIZE: 64
     NUM_COPY: 8
 
diff --git a/demo/cfg/birds-eval.yml b/demo/cfg/birds-eval.yml
index 78ba936..1a44393 100644
--- a/demo/cfg/birds-eval.yml
+++ b/demo/cfg/birds-eval.yml
@@ -7,7 +7,7 @@ Z_DIM: 100
 
 TRAIN:
     FLAG: False
-    PRETRAINED_MODEL: './models/birds_model_164000.ckpt'
+    PRETRAINED_MODEL: './models/stageII/model_330000.ckpt'
     BATCH_SIZE: 64
     NUM_COPY: 8
 
diff --git a/demo/cfg/birds-skip-thought-demo.yml b/demo/cfg/birds-skip-thought-demo.yml
index e346428..1c00129 100644
--- a/demo/cfg/birds-skip-thought-demo.yml
+++ b/demo/cfg/birds-skip-thought-demo.yml
@@ -6,7 +6,7 @@ Z_DIM: 100
 
 TEST:
     CAPTION_PATH: './Data/birds/example_captions.txt'
-    PRETRAINED_MODEL: './models/birds_skip_thought_model_164000.ckpt'
+    PRETRAINED_MODEL: './models/stageII/model_330000.ckpt'
     BATCH_SIZE: 64
     NUM_COPY: 8
 
diff --git a/demo/demo.py b/demo/demo.py
index 6f21a72..a4bc9d3 100644
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -1,31 +1,30 @@
 from __future__ import division
 from __future__ import print_function
 
-import prettytensor as pt
 import tensorflow as tf
 import numpy as np
-import scipy.misc
+import imageio
 import os
 import argparse
 import torchfile
 from PIL import Image, ImageDraw, ImageFont
 import re
 
-from misc.config import cfg, cfg_from_file
-from misc.utils import mkdir_p
-from stageII.model import CondGAN
+import sys
+sys.path.append('misc')
+sys.path.append('stageII')
+
+from config import cfg, cfg_from_file
+from utils import mkdir_p, caption_convert
+from model import CondGAN
+from skimage.transform import resize
 
 
 def parse_args():
     parser = argparse.ArgumentParser(description='Train a GAN network')
-    parser.add_argument('--cfg', dest='cfg_file',
-                        help='optional config file',
-                        default=None, type=str)
-    parser.add_argument('--gpu', dest='gpu_id',
-                        help='GPU device id to use [0]',
-                        default=-1, type=int)
-    parser.add_argument('--caption_path', type=str, default=None,
-                        help='Path to the file with text sentences')
+    parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str)
+    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=-1, type=int)
+    parser.add_argument('--caption_path', type=str, default=None, help='Path to the file with text sentences')
     # if len(sys.argv) == 1:
     #    parser.print_help()
     #    sys.exit(1)
@@ -50,26 +49,22 @@ def sample_encoded_context(embeddings, model, bAugmentation=True):
 
 
 def build_model(sess, embedding_dim, batch_size):
-    model = CondGAN(
-        lr_imsize=cfg.TEST.LR_IMSIZE,
-        hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE))
-
-    embeddings = tf.placeholder(
-        tf.float32, [batch_size, embedding_dim],
-        name='conditional_embeddings')
-    with pt.defaults_scope(phase=pt.Phase.test):
-        with tf.variable_scope("g_net"):
-            c = sample_encoded_context(embeddings, model)
-            z = tf.random_normal([batch_size, cfg.Z_DIM])
-            fake_images = model.get_generator(tf.concat(1, [c, z]))
-        with tf.variable_scope("hr_g_net"):
-            hr_c = sample_encoded_context(embeddings, model)
-            hr_fake_images = model.hr_get_generator(fake_images, hr_c)
+    model = CondGAN(lr_imsize=cfg.TEST.LR_IMSIZE, hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE))
+
+    embeddings = tf.placeholder(tf.float32, [batch_size, embedding_dim], name='conditional_embeddings')
+
+    with tf.variable_scope("g_net"):
+        c = sample_encoded_context(embeddings, model)
+        z = tf.random_normal([batch_size, cfg.Z_DIM])
+        fake_images = model.get_generator(tf.concat([c, z], 1), False)
+    with tf.variable_scope("hr_g_net"):
+        hr_c = sample_encoded_context(embeddings, model)
+        hr_fake_images = model.hr_get_generator(fake_images, hr_c, False)
 
     ckt_path = cfg.TEST.PRETRAINED_MODEL
     if ckt_path.find('.ckpt') != -1:
         print("Reading model parameters from %s" % ckt_path)
-        saver = tf.train.Saver(tf.all_variables())
+        saver = tf.train.Saver(tf.global_variables())
         saver.restore(sess, ckt_path)
     else:
         print("Input a valid model path.")
@@ -77,6 +72,7 @@ def build_model(sess, embedding_dim, batch_size):
 
 
 def drawCaption(img, caption):
+    caption = caption_convert(caption)
     img_txt = Image.fromarray(img)
     # get a font
     fnt = ImageFont.truetype('Pillow/Tests/fonts/FreeMono.ttf', 50)
@@ -102,9 +98,7 @@ def drawCaption(img, caption):
     return img_txt
 
 
-def save_super_images(sample_batchs, hr_sample_batchs,
-                      captions_batch, batch_size,
-                      startID, save_dir):
+def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_size, startID, save_dir):
     if not os.path.isdir(save_dir):
         print('Make a new folder: ', save_dir)
         mkdir_p(save_dir)
@@ -112,7 +106,7 @@ def save_super_images(sample_batchs, hr_sample_batchs,
     # Save up to 16 samples for each text embedding/sentence
     img_shape = hr_sample_batchs[0][0].shape
     for j in range(batch_size):
-        if not re.search('[a-zA-Z]+', captions_batch[j]):
+        if not re.search(b'[a-zA-Z]+', captions_batch[j]):
             continue
 
         padding = np.zeros(img_shape)
@@ -121,9 +115,10 @@ def save_super_images(sample_batchs, hr_sample_batchs,
         # First row with up to 8 samples
         for i in range(np.minimum(8, len(sample_batchs))):
             lr_img = sample_batchs[i][j]
+            lr_img = (lr_img + 1.0) * 127.5
             hr_img = hr_sample_batchs[i][j]
             hr_img = (hr_img + 1.0) * 127.5
-            re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
+            re_sample = resize(lr_img, hr_img.shape[:2])
             row1.append(re_sample)
             row2.append(hr_img)
         row1 = np.concatenate(row1, axis=1)
@@ -136,29 +131,26 @@ def save_super_images(sample_batchs, hr_sample_batchs,
             row2 = [padding]
             for i in range(8, len(sample_batchs)):
                 lr_img = sample_batchs[i][j]
+                lr_img = (lr_img + 1.0) * 127.5
                 hr_img = hr_sample_batchs[i][j]
                 hr_img = (hr_img + 1.0) * 127.5
-                re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
+                re_sample = resize(lr_img, hr_img.shape[:2])
                 row1.append(re_sample)
                 row2.append(hr_img)
             row1 = np.concatenate(row1, axis=1)
             row2 = np.concatenate(row2, axis=1)
             super_row = np.concatenate([row1, row2], axis=0)
             superimage2 = np.zeros_like(superimage)
-            superimage2[:super_row.shape[0],
-                        :super_row.shape[1],
-                        :super_row.shape[2]] = super_row
+            superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row
             mid_padding = np.zeros((64, superimage.shape[1], 3))
-            superimage =\
-                np.concatenate([superimage, mid_padding, superimage2], axis=0)
+            superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0)
 
         top_padding = np.zeros((128, superimage.shape[1], 3))
-        superimage =\
-            np.concatenate([top_padding, superimage], axis=0)
+        superimage = np.concatenate([top_padding, superimage], axis=0)
 
         fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j)
         superimage = drawCaption(np.uint8(superimage), captions_batch[j])
-        scipy.misc.imsave(fullpath, superimage)
+        imageio.imwrite(fullpath, superimage)
 
 
 if __name__ == "__main__":
@@ -188,8 +180,8 @@ def save_super_images(sample_batchs, hr_sample_batchs,
         config = tf.ConfigProto(allow_soft_placement=True)
         with tf.Session(config=config) as sess:
             with tf.device("/gpu:%d" % cfg.GPU_ID):
-                embeddings_holder, fake_images_opt, hr_fake_images_opt =\
-                    build_model(sess, embeddings.shape[-1], batch_size)
+                embeddings_holder, fake_images_opt, hr_fake_images_opt = build_model(sess, embeddings.shape[-1],
+                                                                                     batch_size)
 
                 count = 0
                 while count < num_embeddings:
@@ -205,19 +197,14 @@ def save_super_images(sample_batchs, hr_sample_batchs,
                     # Generate up to 16 images for each sentence with
                     # randomness from noise z and conditioning augmentation.
                     for i in range(np.minimum(16, cfg.TEST.NUM_COPY)):
-                        hr_samples, samples =\
-                            sess.run([hr_fake_images_opt, fake_images_opt],
-                                     {embeddings_holder: embeddings_batch})
+                        hr_samples, samples = sess.run([hr_fake_images_opt, fake_images_opt],
+                                                       {embeddings_holder: embeddings_batch})
                         samples_batchs.append(samples)
                         hr_samples_batchs.append(hr_samples)
-                    save_super_images(samples_batchs,
-                                      hr_samples_batchs,
-                                      captions_batch,
-                                      batch_size,
-                                      count, save_dir)
+                    save_super_images(samples_batchs, hr_samples_batchs, captions_batch, batch_size, count, save_dir)
                     count += batch_size
 
         print('Finish generating samples for %d sentences:' % num_embeddings)
         print('Example sentences:')
-        for i in xrange(np.minimum(10, num_embeddings)):
-            print('Sentence %d: %s' % (i, captions_list[i]))
+        for i in range(np.minimum(10, num_embeddings)):
+            print('Sentence %d: %s' % (i, caption_convert(captions_list[i])))
diff --git a/demo/flowers_demo.sh b/demo/flowers_demo.sh
index 287699b..28d854b 100644
--- a/demo/flowers_demo.sh
+++ b/demo/flowers_demo.sh
@@ -1,3 +1,4 @@
+#!/usr/bin/env bash
 #
 # Extract text embeddings from the encoder
 #
@@ -16,7 +17,7 @@ th demo/get_embedding.lua
 #
 # Generate image from text embeddings
 #
-python demo/demo.py \
+python3 demo/demo.py \
 --cfg demo/cfg/flowers-demo.yml \
 --gpu ${GPU} \
 --caption_path ${CAPTION_PATH}.t7
diff --git a/misc/config.py b/misc/config.py
index 3ff777b..0653da3 100644
--- a/misc/config.py
+++ b/misc/config.py
@@ -1,7 +1,6 @@
 from __future__ import division
 from __future__ import print_function
 
-import os.path as osp
 import numpy as np
 from easydict import EasyDict as edict
 
@@ -48,6 +47,7 @@
 __C.TRAIN.COEFF = edict()
 __C.TRAIN.COEFF.KL = 2.0
 
+# For Stage II training
 __C.TRAIN.FINETUNE_LR = False
 __C.TRAIN.FT_LR_RETIO = 0.1
 
@@ -66,9 +66,9 @@ def _merge_a_into_b(a, b):
     if type(a) is not edict:
         return
 
-    for k, v in a.iteritems():
+    for k, v in a.items():
         # a must specify keys that are in b
-        if not b.has_key(k):
+        if k not in b:
             raise KeyError('{} is not a valid config key'.format(k))
 
         # the types must match, too
diff --git a/misc/custom_ops.py b/misc/custom_ops.py
index 11b48e8..458edd8 100644
--- a/misc/custom_ops.py
+++ b/misc/custom_ops.py
@@ -5,132 +5,102 @@
 from __future__ import division
 from __future__ import print_function
 
-import prettytensor as pt
-from tensorflow.python.training import moving_averages
 import tensorflow as tf
-from prettytensor.pretty_tensor_class import Phase
 import numpy as np
 
 
-class conv_batch_norm(pt.VarStoreMethod):
-    """Code modification of:
-     http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
-     and
-     https://github.com/tensorflow/models/blob/master/inception/inception/slim/ops.py"""
-
-    def __call__(self, input_layer, epsilon=1e-5, decay=0.9, name="batch_norm",
-                 in_dim=None, phase=Phase.train):
-        shape = input_layer.shape
-        shp = in_dim or shape[-1]
-        with tf.variable_scope(name) as scope:
-            self.mean = self.variable('mean', [shp], init=tf.constant_initializer(0.), train=False)
-            self.variance = self.variable('variance', [shp], init=tf.constant_initializer(1.0), train=False)
-
-            self.gamma = self.variable("gamma", [shp], init=tf.random_normal_initializer(1., 0.02))
-            self.beta = self.variable("beta", [shp], init=tf.constant_initializer(0.))
-
-            if phase == Phase.train:
-                mean, variance = tf.nn.moments(input_layer.tensor, [0, 1, 2])
-                mean.set_shape((shp,))
-                variance.set_shape((shp,))
-
-                update_moving_mean = moving_averages.assign_moving_average(self.mean, mean, decay)
-                update_moving_variance = moving_averages.assign_moving_average(self.variance, variance, decay)
-
-                with tf.control_dependencies([update_moving_mean, update_moving_variance]):
-                    normalized_x = tf.nn.batch_norm_with_global_normalization(
-                        input_layer.tensor, mean, variance, self.beta, self.gamma, epsilon,
-                        scale_after_normalization=True)
-            else:
-                normalized_x = tf.nn.batch_norm_with_global_normalization(
-                    input_layer.tensor, self.mean, self.variance,
-                    self.beta, self.gamma, epsilon,
-                    scale_after_normalization=True)
-            return input_layer.with_tensor(normalized_x, parameters=self.vars)
-
-
-pt.Register(assign_defaults=('phase'))(conv_batch_norm)
-
-
-@pt.Register(assign_defaults=('phase'))
-class fc_batch_norm(conv_batch_norm):
-    def __call__(self, input_layer, *args, **kwargs):
-        ori_shape = input_layer.shape
-        if ori_shape[0] is None:
-            ori_shape[0] = -1
-        new_shape = [ori_shape[0], 1, 1, ori_shape[1]]
-        x = tf.reshape(input_layer.tensor, new_shape)
-        normalized_x = super(self.__class__, self).__call__(input_layer.with_tensor(x), *args, **kwargs)  # input_layer)
-        return normalized_x.reshape(ori_shape)
-
-
-def leaky_rectify(x, leakiness=0.01):
-    assert leakiness <= 1
-    ret = tf.maximum(x, leakiness * x)
-    # import ipdb; ipdb.set_trace()
-    return ret
-
-
-@pt.Register
-class custom_conv2d(pt.VarStoreMethod):
-    def __call__(self, input_layer, output_dim,
-                 k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, in_dim=None, padding='SAME',
-                 name="conv2d"):
-        with tf.variable_scope(name):
-            w = self.variable('w', [k_h, k_w, in_dim or input_layer.shape[-1], output_dim],
-                              init=tf.truncated_normal_initializer(stddev=stddev))
-            conv = tf.nn.conv2d(input_layer.tensor, w, strides=[1, d_h, d_w, 1], padding=padding)
-
-            # biases = self.variable('biases', [output_dim], init=tf.constant_initializer(0.0))
-            # import ipdb; ipdb.set_trace()
-            # return input_layer.with_tensor(tf.nn.bias_add(conv, biases), parameters=self.vars)
-            return input_layer.with_tensor(conv, parameters=self.vars)
-
-
-@pt.Register
-class custom_deconv2d(pt.VarStoreMethod):
-    def __call__(self, input_layer, output_shape,
-                 k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
-                 name="deconv2d"):
-        output_shape[0] = input_layer.shape[0]
-        ts_output_shape = tf.pack(output_shape)
-        with tf.variable_scope(name):
-            # filter : [height, width, output_channels, in_channels]
-            w = self.variable('w', [k_h, k_w, output_shape[-1], input_layer.shape[-1]],
-                              init=tf.random_normal_initializer(stddev=stddev))
-
-            try:
-                deconv = tf.nn.conv2d_transpose(input_layer, w,
-                                                output_shape=ts_output_shape,
-                                                strides=[1, d_h, d_w, 1])
-
-            # Support for versions of TensorFlow before 0.7.0
-            except AttributeError:
-                deconv = tf.nn.deconv2d(input_layer, w, output_shape=ts_output_shape,
-                                        strides=[1, d_h, d_w, 1])
-
-            # biases = self.variable('biases', [output_shape[-1]], init=tf.constant_initializer(0.0))
-            # deconv = tf.reshape(tf.nn.bias_add(deconv, biases), [-1] + output_shape[1:])
-            deconv = tf.reshape(deconv, [-1] + output_shape[1:])
-
-            return deconv
-
-
-@pt.Register
-class custom_fully_connected(pt.VarStoreMethod):
-    def __call__(self, input_layer, output_size, scope=None, in_dim=None, stddev=0.02, bias_start=0.0):
-        shape = input_layer.shape
-        input_ = input_layer.tensor
-        try:
-            if len(shape) == 4:
-                input_ = tf.reshape(input_, tf.pack([tf.shape(input_)[0], np.prod(shape[1:])]))
-                input_.set_shape([None, np.prod(shape[1:])])
-                shape = input_.get_shape().as_list()
-
-            with tf.variable_scope(scope or "Linear"):
-                matrix = self.variable("Matrix", [in_dim or shape[1], output_size], dt=tf.float32,
-                                       init=tf.random_normal_initializer(stddev=stddev))
-                bias = self.variable("bias", [output_size], init=tf.constant_initializer(bias_start))
-                return input_layer.with_tensor(tf.matmul(input_, matrix) + bias, parameters=self.vars)
-        except Exception:
-            import ipdb; ipdb.set_trace()
+def fc(inputs, num_out, name, activation_fn=None, reuse=None):
+    shape = inputs.get_shape()
+    if len(shape) == 4:
+        inputs = tf.reshape(inputs, tf.stack([tf.shape(inputs)[0], np.prod(shape[1:])]))
+        inputs.set_shape([None, np.prod(shape[1:])])
+
+    w_init = tf.random_normal_initializer(stddev=0.02)
+
+    return tf.contrib.layers.fully_connected(inputs, num_out, activation_fn=activation_fn, weights_initializer=w_init,
+                                             reuse=reuse, scope=name)
+
+
+def concat(inputs, axis):
+    return tf.concat(values=inputs, axis=axis)
+
+
+def conv_batch_normalization(inputs, name, epsilon=1e-5, is_training=True, activation_fn=None, reuse=None):
+    return tf.contrib.layers.batch_norm(inputs, decay=0.9, center=True, scale=True, epsilon=epsilon,
+                                        activation_fn=activation_fn,
+                                        param_initializers={'beta': tf.constant_initializer(0.),
+                                                            'gamma': tf.random_normal_initializer(1., 0.02)},
+                                        reuse=reuse, is_training=is_training, scope=name)
+
+
+def fc_batch_normalization(inputs, name, epsilon=1e-5, is_training=True, activation_fn=None, reuse=None):
+    ori_shape = inputs.get_shape()
+    if ori_shape[0] is None:
+        ori_shape = -1
+    new_shape = [ori_shape[0], 1, 1, ori_shape[1]]
+    x = tf.reshape(inputs, new_shape)
+    normalized_x = conv_batch_normalization(x, name, epsilon=epsilon, is_training=is_training,
+                                            activation_fn=activation_fn, reuse=reuse)
+    return tf.reshape(normalized_x, ori_shape)
+
+
+def reshape(inputs, shape, name):
+    return tf.reshape(inputs, shape, name)
+
+
+def Conv2d(inputs, k_h, k_w, c_o, s_h, s_w, name, activation_fn=None, reuse=None, padding='SAME', biased=False):
+    c_i = inputs.get_shape()[-1]
+    w_init = tf.random_normal_initializer(stddev=0.02)
+
+    convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
+    with tf.variable_scope(name, reuse=reuse) as scope:
+        kernel = tf.get_variable(name='weights', shape=[k_h, k_w, c_i, c_o], initializer=w_init)
+        output = convolve(inputs, kernel)
+
+        if biased:
+            biases = tf.get_variable(name='biases', shape=[c_o])
+            output = tf.nn.bias_add(output, biases)
+        if activation_fn is not None:
+            output = activation_fn(output, name=scope.name)
+
+        return output
+
+
+def Deconv2d(inputs, output_shape, name, k_h, k_w, s_h=2, s_w=2, reuse=None, activation_fn=None, biased=False):
+    output_shape[0] = inputs.get_shape()[0]
+    ts_output_shape = tf.stack(output_shape)
+    w_init = tf.random_normal_initializer(stddev=0.02)
+
+    deconvolve = lambda i, k: tf.nn.conv2d_transpose(i, k, output_shape=ts_output_shape, strides=[1, s_h, s_w, 1])
+    with tf.variable_scope(name, reuse=reuse) as scope:
+        kernel = tf.get_variable(name='weights', shape=[k_h, k_w, output_shape[-1], inputs.get_shape()[-1]],
+                                 initializer=w_init)
+        output = deconvolve(inputs, kernel)
+
+        if biased:
+            biases = tf.get_variable(name='biases', shape=[output_shape[-1]])
+            output = tf.nn.bias_add(output, biases)
+        if activation_fn is not None:
+            output = activation_fn(output, name=scope.name)
+
+        deconv = tf.reshape(output, [-1] + output_shape[1:])
+
+        return deconv
+
+
+def add(inputs, name):
+    return tf.add_n(inputs, name=name)
+
+
+def UpSample(inputs, size, method, align_corners, name):
+    return tf.image.resize_images(inputs, size, method, align_corners)
+
+
+def flatten(inputs, name):
+    input_shape = inputs.get_shape()
+    dim = 1
+    for d in input_shape[1:].as_list():
+        dim *= d
+        inputs = tf.reshape(inputs, [-1, dim])
+
+    return inputs
diff --git a/misc/datasets.py b/misc/datasets.py
index 624e6af..cd7bcee 100644
--- a/misc/datasets.py
+++ b/misc/datasets.py
@@ -1,16 +1,13 @@
 from __future__ import division
 from __future__ import print_function
 
-
 import numpy as np
 import pickle
 import random
 
 
 class Dataset(object):
-    def __init__(self, images, imsize, embeddings=None,
-                 filenames=None, workdir=None,
-                 labels=None, aug_flag=True,
+    def __init__(self, images, imsize, embeddings=None, filenames=None, workdir=None, labels=None, aug_flag=True,
                  class_id=None, class_range=None):
         self._images = images
         self._embeddings = embeddings
@@ -59,8 +56,7 @@ def readCaptions(self, filenames, class_id):
         if name.find('jpg/') != -1:  # flowers dataset
             class_name = 'class_%05d/' % class_id
             name = name.replace('jpg/', class_name)
-        cap_path = '%s/text_c10/%s.txt' %\
-                   (self.workdir, name)
+        cap_path = '%s/text_c10/%s.txt' % (self.workdir, name)
         with open(cap_path, "r") as f:
             captions = f.read().split('\n')
         captions = [cap for cap in captions if len(cap) > 0]
@@ -68,14 +64,13 @@ def readCaptions(self, filenames, class_id):
 
     def transform(self, images):
         if self._aug_flag:
-            transformed_images =\
-                np.zeros([images.shape[0], self._imsize, self._imsize, 3])
+            transformed_images = np.zeros([images.shape[0], self._imsize, self._imsize, 3])
             ori_size = images.shape[1]
             for i in range(images.shape[0]):
                 h1 = np.floor((ori_size - self._imsize) * np.random.random())
                 w1 = np.floor((ori_size - self._imsize) * np.random.random())
-                cropped_image =\
-                    images[i][w1: w1 + self._imsize, h1: h1 + self._imsize, :]
+                cropped_image = images[i][int(w1): int(w1 + self._imsize), int(h1): int(h1 + self._imsize), :]
+                # cropped_image = images[i][w1: w1 + self._imsize, h1: h1 + self._imsize, :]
                 if random.random() > 0.5:
                     transformed_images[i] = np.fliplr(cropped_image)
                 else:
@@ -93,12 +88,10 @@ def sample_embeddings(self, embeddings, filenames, class_id, sample_num):
             sampled_embeddings = []
             sampled_captions = []
             for i in range(batch_size):
-                randix = np.random.choice(embedding_num,
-                                          sample_num, replace=False)
+                randix = np.random.choice(embedding_num, sample_num, replace=False)
                 if sample_num == 1:
                     randix = int(randix)
-                    captions = self.readCaptions(filenames[i],
-                                                 class_id[i])
+                    captions = self.readCaptions(filenames[i], class_id[i])
                     sampled_captions.append(captions[randix])
                     sampled_embeddings.append(embeddings[i, randix, :])
                 else:
@@ -128,11 +121,8 @@ def next_batch(self, batch_size, window):
 
         current_ids = self._perm[start:end]
         fake_ids = np.random.randint(self._num_examples, size=batch_size)
-        collision_flag =\
-            (self._class_id[current_ids] == self._class_id[fake_ids])
-        fake_ids[collision_flag] =\
-            (fake_ids[collision_flag] +
-             np.random.randint(100, 200)) % self._num_examples
+        collision_flag = (self._class_id[current_ids] == self._class_id[fake_ids])
+        fake_ids[collision_flag] = (fake_ids[collision_flag] + np.random.randint(100, 200)) % self._num_examples
 
         sampled_images = self._images[current_ids]
         sampled_wrong_images = self._images[fake_ids, :, :, :]
@@ -148,9 +138,8 @@ def next_batch(self, batch_size, window):
         if self._embeddings is not None:
             filenames = [self._filenames[i] for i in current_ids]
             class_id = [self._class_id[i] for i in current_ids]
-            sampled_embeddings, sampled_captions = \
-                self.sample_embeddings(self._embeddings[current_ids],
-                                       filenames, class_id, window)
+            sampled_embeddings, sampled_captions = self.sample_embeddings(self._embeddings[current_ids], filenames,
+                                                                          class_id, window)
             ret_list.append(sampled_embeddings)
             ret_list.append(sampled_captions)
         else:
@@ -185,8 +174,7 @@ def next_batch_test(self, batch_size, start, max_captions):
         sampled_filenames = self._filenames[start:end]
         sampled_class_id = self._class_id[start:end]
         for i in range(len(sampled_filenames)):
-            captions = self.readCaptions(sampled_filenames[i],
-                                         sampled_class_id[i])
+            captions = self.readCaptions(sampled_filenames[i], sampled_class_id[i])
             # print(captions)
             sampled_captions.append(captions)
 
@@ -194,8 +182,7 @@ def next_batch_test(self, batch_size, start, max_captions):
             batch = sampled_embeddings[:, i, :]
             sampled_embeddings_batchs.append(np.squeeze(batch))
 
-        return [sampled_images, sampled_embeddings_batchs,
-                self._saveIDs[start:end], sampled_captions]
+        return [sampled_images, sampled_embeddings_batchs, self._saveIDs[start:end], sampled_captions]
 
 
 class TextDataset(object):
@@ -207,8 +194,7 @@ def __init__(self, workdir, embedding_type, hr_lr_ratio):
         elif self.hr_lr_ratio == 4:
             self.image_filename = '/304images.pickle'
 
-        self.image_shape = [lr_imsize * self.hr_lr_ratio,
-                            lr_imsize * self.hr_lr_ratio, 3]
+        self.image_shape = [lr_imsize * self.hr_lr_ratio, lr_imsize * self.hr_lr_ratio, 3]
         self.image_dim = self.image_shape[0] * self.image_shape[1] * 3
         self.embedding_shape = None
         self.train = None
@@ -226,7 +212,7 @@ def get_data(self, pickle_path, aug_flag=True):
             print('images: ', images.shape)
 
         with open(pickle_path + self.embedding_filename, 'rb') as f:
-            embeddings = pickle.load(f)
+            embeddings = pickle.load(f, encoding="latin-1")
             embeddings = np.array(embeddings)
             self.embedding_shape = [embeddings.shape[-1]]
             print('embeddings: ', embeddings.shape)
@@ -234,8 +220,6 @@ def get_data(self, pickle_path, aug_flag=True):
             list_filenames = pickle.load(f)
             print('list_filenames: ', len(list_filenames), list_filenames[0])
         with open(pickle_path + '/class_info.pickle', 'rb') as f:
-            class_id = pickle.load(f)
+            class_id = pickle.load(f, encoding="latin-1")
 
-        return Dataset(images, self.image_shape[0], embeddings,
-                       list_filenames, self.workdir, None,
-                       aug_flag, class_id)
+        return Dataset(images, self.image_shape[0], embeddings, list_filenames, self.workdir, None, aug_flag, class_id)
diff --git a/misc/preprocess_birds.py b/misc/preprocess_birds.py
index c93736a..717f703 100644
--- a/misc/preprocess_birds.py
+++ b/misc/preprocess_birds.py
@@ -7,10 +7,12 @@
 import numpy as np
 import os
 import pickle
-from misc.utils import get_image
-import scipy.misc
+from utils import get_image
 import pandas as pd
 
+# > Python3
+from skimage.transform import resize
+
 # from glob import glob
 
 # TODO: 1. current label is temporary, need to change according to real label
@@ -43,7 +45,7 @@ def load_bbox(data_dir):
     #
     filename_bbox = {img_file[:-4]: [] for img_file in filenames}
     numImgs = len(filenames)
-    for i in xrange(0, numImgs):
+    for i in range(numImgs):
         # bbox = [x-left, y-top, width, height]
         bbox = df_bounding_boxes.iloc[i][1:].tolist()
 
@@ -64,7 +66,8 @@ def save_data_list(inpath, outpath, filenames, filename_bbox):
         img = get_image(f_name, LOAD_SIZE, is_crop=True, bbox=bbox)
         img = img.astype('uint8')
         hr_images.append(img)
-        lr_img = scipy.misc.imresize(img, [lr_size, lr_size], 'bicubic')
+        lr_img = resize(img, [lr_size, lr_size], order=3)
+        #lr_img = scipy.misc.imresize(img, [lr_size, lr_size], 'bicubic')
         lr_images.append(lr_img)
         cnt += 1
         if cnt % 100 == 0:
diff --git a/misc/preprocess_flowers.py b/misc/preprocess_flowers.py
index e6f9789..b273c4e 100644
--- a/misc/preprocess_flowers.py
+++ b/misc/preprocess_flowers.py
@@ -7,9 +7,12 @@
 import numpy as np
 import os
 import pickle
-from misc.utils import get_image
+from utils import get_image
 import scipy.misc
 
+# > Python3
+from skimage.transform import resize
+
 # from glob import glob
 
 # TODO: 1. current label is temporary, need to change according to real label
@@ -39,7 +42,7 @@ def save_data_list(inpath, outpath, filenames):
         img = get_image(f_name, LOAD_SIZE, is_crop=False)
         img = img.astype('uint8')
         hr_images.append(img)
-        lr_img = scipy.misc.imresize(img, [lr_size, lr_size], 'bicubic')
+        lr_img = resize (img, [lr_size, lr_size], order=3)
         lr_images.append(lr_img)
         cnt += 1
         if cnt % 100 == 0:
diff --git a/misc/skipthoughts.py b/misc/skipthoughts.py
index d8be946..6a00f45 100644
--- a/misc/skipthoughts.py
+++ b/misc/skipthoughts.py
@@ -2,14 +2,14 @@
 Skip-thought vectors
 https://github.com/ryankiros/skip-thoughts
 '''
-import os
+# import os
+import warnings
 
 import theano
 import theano.tensor as tensor
 
-import cPickle as pkl
+import pickle as pkl
 import numpy
-import copy
 import nltk
 
 from collections import OrderedDict, defaultdict
@@ -34,10 +34,10 @@ def load_model():
 	Load the model with saved tables
 	"""
 	# Load model options
-	print 'Loading model parameters...'
-	with open('%s.pkl'%path_to_umodel, 'rb') as f:
+	print('Loading model parameters...')
+	with open('%s.pkl' % path_to_umodel, 'rb') as f:
 		uoptions = pkl.load(f)
-	with open('%s.pkl'%path_to_bmodel, 'rb') as f:
+	with open('%s.pkl' % path_to_bmodel, 'rb') as f:
 		boptions = pkl.load(f)
 
 	# Load parameters
@@ -49,19 +49,19 @@ def load_model():
 	btparams = init_tparams(bparams)
 
 	# Extractor functions
-	print 'Compiling encoders...'
+	print('Compiling encoders...')
 	embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
 	f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
 	embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
 	f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')
 
 	# Tables
-	print 'Loading tables...'
+	print('Loading tables...')
 	utable, btable = load_tables()
 
 	# Store everything we need in a dictionary
-	print 'Packing up...'
-	model = {}
+	print('Packing up...')
+	model = dict()
 	model['uoptions'] = uoptions
 	model['boptions'] = boptions
 	model['utable'] = utable
@@ -96,7 +96,7 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False)
 	X = preprocess(X)
 
 	# word dictionary and init
-	d = defaultdict(lambda : 0)
+	d = defaultdict(lambda: 0)
 	for w in model['utable'].keys():
 		d[w] = 1
 	ufeatures = numpy.zeros((len(X), model['uoptions']['dim']), dtype='float32')
@@ -105,14 +105,14 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False)
 	# length dictionary
 	ds = defaultdict(list)
 	captions = [s.split() for s in X]
-	for i,s in enumerate(captions):
+	for i, s in enumerate(captions):
 		ds[len(s)].append(i)
 
 	# Get features. This encodes by length, in order to avoid wasting computation
 	for k in ds.keys():
 		if verbose:
-			print k
-		numbatches = len(ds[k]) / batch_size + 1
+			print(k)
+		numbatches = len(ds[k]) // batch_size + 1
 		for minibatch in range(numbatches):
 			caps = ds[k][minibatch::numbatches]
 
@@ -126,20 +126,20 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False)
 				caption = captions[c]
 				for j in range(len(caption)):
 					if d[caption[j]] > 0:
-						uembedding[j,ind] = model['utable'][caption[j]]
-						bembedding[j,ind] = model['btable'][caption[j]]
+						uembedding[j, ind] = model['utable'][caption[j]]
+						bembedding[j, ind] = model['btable'][caption[j]]
 					else:
-						uembedding[j,ind] = model['utable']['UNK']
-						bembedding[j,ind] = model['btable']['UNK']
+						uembedding[j, ind] = model['utable']['UNK']
+						bembedding[j, ind] = model['btable']['UNK']
 				if use_eos:
-					uembedding[-1,ind] = model['utable']['<eos>']
-					bembedding[-1,ind] = model['btable']['<eos>']
+					uembedding[-1, ind] = model['utable']['<eos>']
+					bembedding[-1, ind] = model['btable']['<eos>']
 			if use_eos:
-				uff = model['f_w2v'](uembedding, numpy.ones((len(caption)+1,len(caps)), dtype='float32'))
-				bff = model['f_w2v2'](bembedding, numpy.ones((len(caption)+1,len(caps)), dtype='float32'))
+				uff = model['f_w2v'](uembedding, numpy.ones((len(caption)+1, len(caps)), dtype='float32'))
+				bff = model['f_w2v2'](bembedding, numpy.ones((len(caption)+1, len(caps)), dtype='float32'))
 			else:
-				uff = model['f_w2v'](uembedding, numpy.ones((len(caption),len(caps)), dtype='float32'))
-				bff = model['f_w2v2'](bembedding, numpy.ones((len(caption),len(caps)), dtype='float32'))
+				uff = model['f_w2v'](uembedding, numpy.ones((len(caption), len(caps)), dtype='float32'))
+				bff = model['f_w2v2'](bembedding, numpy.ones((len(caption), len(caps)), dtype='float32'))
 			if use_norm:
 				for j in range(len(uff)):
 					uff[j] /= norm(uff[j])
@@ -180,10 +180,10 @@ def nn(model, text, vectors, query, k=5):
 	scores = numpy.dot(qf, vectors.T).flatten()
 	sorted_args = numpy.argsort(scores)[::-1]
 	sentences = [text[a] for a in sorted_args[:k]]
-	print 'QUERY: ' + query
-	print 'NEAREST: '
+	print('QUERY: ' + query)
+	print('NEAREST: ')
 	for i, s in enumerate(sentences):
-		print s, sorted_args[i]
+		print(s, sorted_args[i])
 
 
 def word_features(table):
@@ -207,17 +207,17 @@ def nn_words(table, wordvecs, query, k=10):
 	scores = numpy.dot(qf, wordvecs.T).flatten()
 	sorted_args = numpy.argsort(scores)[::-1]
 	words = [keys[a] for a in sorted_args[:k]]
-	print 'QUERY: ' + query
-	print 'NEAREST: '
+	print('QUERY: ' + query)
+	print('NEAREST: ')
 	for i, w in enumerate(words):
-		print w
+		print(w)
 
 
 def _p(pp, name):
 	"""
 	make prefix-appended name
 	"""
-	return '%s_%s'%(pp, name)
+	return '%s_%s' % (pp, name)
 
 
 def init_tparams(params):
@@ -225,7 +225,7 @@ def init_tparams(params):
 	initialize Theano shared variables according to the initial parameters
 	"""
 	tparams = OrderedDict()
-	for kk, pp in params.iteritems():
+	for kk, pp in params.items():
 		tparams[kk] = theano.shared(params[kk], name=kk)
 	return tparams
 
@@ -235,9 +235,9 @@ def load_params(path, params):
 	load parameters
 	"""
 	pp = numpy.load(path)
-	for kk, vv in params.iteritems():
+	for kk, vv in params.items():
 		if kk not in pp:
-			warnings.warn('%s is not in the archive'%kk)
+			warnings.warn('%s is not in the archive' % kk)
 			continue
 		params[kk] = pp[kk]
 	return params
@@ -246,6 +246,7 @@ def load_params(path, params):
 # layers: 'name': ('parameter initializer', 'feedforward')
 layers = {'gru': ('param_init_gru', 'gru_layer')}
 
+
 def get_layer(name):
 	fns = layers[name]
 	return (eval(fns[0]), eval(fns[1]))
@@ -261,8 +262,8 @@ def init_params(options):
 	params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word'])
 
 	# encoder: GRU
-	params = get_layer(options['encoder'])[0](options, params, prefix='encoder',
-											  nin=options['dim_word'], dim=options['dim'])
+	params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'],
+											  dim=options['dim'])
 	return params
 
 
@@ -276,10 +277,10 @@ def init_params_bi(options):
 	params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word'])
 
 	# encoder: GRU
-	params = get_layer(options['encoder'])[0](options, params, prefix='encoder',
-											  nin=options['dim_word'], dim=options['dim'])
-	params = get_layer(options['encoder'])[0](options, params, prefix='encoder_r',
-											  nin=options['dim_word'], dim=options['dim'])
+	params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'],
+											  dim=options['dim'])
+	params = get_layer(options['encoder'])[0](options, params, prefix='encoder_r', nin=options['dim_word'],
+											  dim=options['dim'])
 	return params
 
 
@@ -292,9 +293,7 @@ def build_encoder(tparams, options):
 	x_mask = tensor.matrix('x_mask', dtype='float32')
 
 	# encoder
-	proj = get_layer(options['encoder'])[1](tparams, embedding, options,
-											prefix='encoder',
-											mask=x_mask)
+	proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask)
 	ctx = proj[0][-1]
 
 	return embedding, x_mask, ctx
@@ -311,12 +310,8 @@ def build_encoder_bi(tparams, options):
 	xr_mask = x_mask[::-1]
 
 	# encoder
-	proj = get_layer(options['encoder'])[1](tparams, embedding, options,
-											prefix='encoder',
-											mask=x_mask)
-	projr = get_layer(options['encoder'])[1](tparams, embeddingr, options,
-											 prefix='encoder_r',
-											 mask=xr_mask)
+	proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask)
+	projr = get_layer(options['encoder'])[1](tparams, embeddingr, options, prefix='encoder_r', mask=xr_mask)
 
 	ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1)
 
@@ -330,10 +325,10 @@ def ortho_weight(ndim):
 	return u.astype('float32')
 
 
-def norm_weight(nin,nout=None, scale=0.1, ortho=True):
-	if nout == None:
+def norm_weight(nin, nout=None, scale=0.1, ortho=True):
+	if nout is None:
 		nout = nin
-	if nout == nin and ortho:
+	if nout is nin and ortho:
 		W = ortho_weight(nin)
 	else:
 		W = numpy.random.uniform(low=-scale, high=scale, size=(nin, nout))
@@ -344,23 +339,21 @@ def param_init_gru(options, params, prefix='gru', nin=None, dim=None):
 	"""
 	parameter init for GRU
 	"""
-	if nin == None:
+	if nin is None:
 		nin = options['dim_proj']
-	if dim == None:
+	if dim is None:
 		dim = options['dim_proj']
-	W = numpy.concatenate([norm_weight(nin,dim),
-						   norm_weight(nin,dim)], axis=1)
-	params[_p(prefix,'W')] = W
-	params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
-	U = numpy.concatenate([ortho_weight(dim),
-						   ortho_weight(dim)], axis=1)
-	params[_p(prefix,'U')] = U
+	W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim)], axis=1)
+	params[_p(prefix, 'W')] = W
+	params[_p(prefix, 'b')] = numpy.zeros((2 * dim,)).astype('float32')
+	U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1)
+	params[_p(prefix, 'U')] = U
 
 	Wx = norm_weight(nin, dim)
-	params[_p(prefix,'Wx')] = Wx
+	params[_p(prefix, 'Wx')] = Wx
 	Ux = ortho_weight(dim)
-	params[_p(prefix,'Ux')] = Ux
-	params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')
+	params[_p(prefix, 'Ux')] = Ux
+	params[_p(prefix, 'bx')] = numpy.zeros((dim,)).astype('float32')
 
 	return params
 
@@ -375,9 +368,9 @@ def gru_layer(tparams, state_below, options, prefix='gru', mask=None, **kwargs):
 	else:
 		n_samples = 1
 
-	dim = tparams[_p(prefix,'Ux')].shape[1]
+	dim = tparams[_p(prefix, 'Ux')].shape[1]
 
-	if mask == None:
+	if mask is None:
 		mask = tensor.alloc(1., state_below.shape[0], 1)
 
 	def _slice(_x, n, dim):
@@ -404,21 +397,18 @@ def _step_slice(m_, x_, xx_, h_, U, Ux):
 		h = tensor.tanh(preactx)
 
 		h = u * h_ + (1. - u) * h
-		h = m_[:,None] * h + (1. - m_)[:,None] * h_
+		h = m_[:, None] * h + (1. - m_)[:, None] * h_
 
 		return h
 
 	seqs = [mask, state_below_, state_belowx]
 	_step = _step_slice
 
-	rval, updates = theano.scan(_step,
-								sequences=seqs,
-								outputs_info = [tensor.alloc(0., n_samples, dim)],
-								non_sequences = [tparams[_p(prefix, 'U')],
-												 tparams[_p(prefix, 'Ux')]],
+	rval, updates = theano.scan(_step, sequences=seqs, outputs_info=[tensor.alloc(0., n_samples, dim)],
+								non_sequences=[tparams[_p(prefix, 'U')], tparams[_p(prefix, 'Ux')]],
 								name=_p(prefix, '_layers'),
 								n_steps=nsteps,
 								profile=profile,
 								strict=True)
-	rval = [rval]
+	rval=[rval]
 	return rval
diff --git a/misc/tf_upgrade.py b/misc/tf_upgrade.py
new file mode 100644
index 0000000..e0a8dcd
--- /dev/null
+++ b/misc/tf_upgrade.py
@@ -0,0 +1,255 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Upgrader for Python scripts from pre-1.0 TensorFlow to 1.0 TensorFlow."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+
+from tensorflow.tools.compatibility import ast_edits
+
+
+class TFAPIChangeSpec(ast_edits.APIChangeSpec):
+    """List of maps that describe what changed in the API."""
+
+    def __init__(self):
+        # Maps from a function name to a dictionary that describes how to
+        # map from an old argument keyword to the new argument keyword.
+        self.function_keyword_renames = {
+            "tf.batch_matmul": {
+                "adj_x": "adjoint_a",
+                "adj_y": "adjoint_b",
+            },
+            "tf.count_nonzero": {
+                "reduction_indices": "axis"
+            },
+            "tf.reduce_all": {
+                "reduction_indices": "axis"
+            },
+            "tf.reduce_any": {
+                "reduction_indices": "axis"
+            },
+            "tf.reduce_max": {
+                "reduction_indices": "axis"
+            },
+            "tf.reduce_mean": {
+                "reduction_indices": "axis"
+            },
+            "tf.reduce_min": {
+                "reduction_indices": "axis"
+            },
+            "tf.reduce_prod": {
+                "reduction_indices": "axis"
+            },
+            "tf.reduce_sum": {
+                "reduction_indices": "axis"
+            },
+            "tf.reduce_logsumexp": {
+                "reduction_indices": "axis"
+            },
+            "tf.expand_dims": {
+                "dim": "axis"
+            },
+            "tf.argmax": {
+                "dimension": "axis"
+            },
+            "tf.argmin": {
+                "dimension": "axis"
+            },
+            "tf.reduce_join": {
+                "reduction_indices": "axis"
+            },
+            "tf.sparse_concat": {
+                "concat_dim": "axis"
+            },
+            "tf.sparse_split": {
+                "split_dim": "axis"
+            },
+            "tf.sparse_reduce_sum": {
+                "reduction_axes": "axis"
+            },
+            "tf.reverse_sequence": {
+                "seq_dim": "seq_axis",
+                "batch_dim": "batch_axis"
+            },
+            "tf.sparse_reduce_sum_sparse": {
+                "reduction_axes": "axis"
+            },
+            "tf.squeeze": {
+                "squeeze_dims": "axis"
+            },
+            "tf.split": {
+                "split_dim": "axis",
+                "num_split": "num_or_size_splits"
+            },
+            "tf.concat": {
+                "concat_dim": "axis"
+            },
+        }
+
+        # Mapping from function to the new name of the function
+        self.symbol_renames = {
+            "tf.inv": "tf.reciprocal",
+            "tf.contrib.deprecated.scalar_summary": "tf.summary.scalar",
+            "tf.contrib.deprecated.histogram_summary": "tf.summary.histogram",
+            "tf.listdiff": "tf.setdiff1d",
+            "tf.list_diff": "tf.setdiff1d",
+            "tf.mul": "tf.multiply",
+            "tf.neg": "tf.negative",
+            "tf.sub": "tf.subtract",
+            "tf.train.SummaryWriter": "tf.summary.FileWriter",
+            "tf.scalar_summary": "tf.summary.scalar",
+            "tf.histogram_summary": "tf.summary.histogram",
+            "tf.audio_summary": "tf.summary.audio",
+            "tf.image_summary": "tf.summary.image",
+            "tf.merge_summary": "tf.summary.merge",
+            "tf.merge_all_summaries": "tf.summary.merge_all",
+            "tf.image.per_image_whitening": "tf.image.per_image_standardization",
+            "tf.all_variables": "tf.global_variables",
+            "tf.VARIABLES": "tf.GLOBAL_VARIABLES",
+            "tf.initialize_all_variables": "tf.global_variables_initializer",
+            "tf.initialize_variables": "tf.variables_initializer",
+            "tf.initialize_local_variables": "tf.local_variables_initializer",
+            "tf.batch_matrix_diag": "tf.matrix_diag",
+            "tf.batch_band_part": "tf.band_part",
+            "tf.batch_set_diag": "tf.set_diag",
+            "tf.batch_matrix_transpose": "tf.matrix_transpose",
+            "tf.batch_matrix_determinant": "tf.matrix_determinant",
+            "tf.batch_matrix_inverse": "tf.matrix_inverse",
+            "tf.batch_cholesky": "tf.cholesky",
+            "tf.batch_cholesky_solve": "tf.cholesky_solve",
+            "tf.batch_matrix_solve": "tf.matrix_solve",
+            "tf.batch_matrix_triangular_solve": "tf.matrix_triangular_solve",
+            "tf.batch_matrix_solve_ls": "tf.matrix_solve_ls",
+            "tf.batch_self_adjoint_eig": "tf.self_adjoint_eig",
+            "tf.batch_self_adjoint_eigvals": "tf.self_adjoint_eigvals",
+            "tf.batch_svd": "tf.svd",
+            "tf.batch_fft": "tf.fft",
+            "tf.batch_ifft": "tf.ifft",
+            "tf.batch_fft2d": "tf.fft2d",
+            "tf.batch_ifft2d": "tf.ifft2d",
+            "tf.batch_fft3d": "tf.fft3d",
+            "tf.batch_ifft3d": "tf.ifft3d",
+            "tf.select": "tf.where",
+            "tf.complex_abs": "tf.abs",
+            "tf.batch_matmul": "tf.matmul",
+            "tf.pack": "tf.stack",
+            "tf.unpack": "tf.unstack",
+            "tf.op_scope": "tf.name_scope",
+        }
+
+        self.change_to_function = {
+            "tf.ones_initializer",
+            "tf.zeros_initializer",
+        }
+
+        # Functions that were reordered should be changed to the new keyword args
+        # for safety, if positional arguments are used. If you have reversed the
+        # positional arguments yourself, this could do the wrong thing.
+        self.function_reorders = {
+            "tf.split": ["axis", "num_or_size_splits", "value", "name"],
+            "tf.sparse_split": ["axis", "num_or_size_splits", "value", "name"],
+            "tf.concat": ["concat_dim", "values", "name"],
+            "tf.svd": ["tensor", "compute_uv", "full_matrices", "name"],
+            "tf.nn.softmax_cross_entropy_with_logits": [
+                "logits", "labels", "dim", "name"
+            ],
+            "tf.nn.sparse_softmax_cross_entropy_with_logits": [
+                "logits", "labels", "name"
+            ],
+            "tf.nn.sigmoid_cross_entropy_with_logits": ["logits", "labels", "name"],
+            "tf.op_scope": ["values", "name", "default_name"],
+        }
+
+        # Warnings that should be printed if corresponding functions are used.
+        self.function_warnings = {
+            "tf.reverse": (
+                ast_edits.ERROR,
+                "tf.reverse has had its argument semantics changed "
+                "significantly. The converter cannot detect this reliably, so "
+                "you need to inspect this usage manually.\n"),
+        }
+
+        self.module_deprecations = {}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+      formatter_class=argparse.RawDescriptionHelpFormatter,
+      description="""Convert a TensorFlow Python file to 1.0
+
+Simple usage:
+  tf_convert.py --infile foo.py --outfile bar.py
+  tf_convert.py --intree ~/code/old --outtree ~/code/new
+""")
+
+    parser.add_argument(
+      "--infile",
+      dest="input_file",
+      help="If converting a single file, the name of the file "
+      "to convert")
+    parser.add_argument(
+      "--outfile",
+      dest="output_file",
+      help="If converting a single file, the output filename.")
+    parser.add_argument(
+      "--intree",
+      dest="input_tree",
+      help="If converting a whole tree of files, the directory "
+      "to read from (relative or absolute).")
+    parser.add_argument(
+      "--outtree",
+      dest="output_tree",
+      help="If converting a whole tree of files, the output "
+      "directory (relative or absolute).")
+    parser.add_argument(
+      "--copyotherfiles",
+      dest="copy_other_files",
+      help=("If converting a whole tree of files, whether to "
+            "copy the other files."),
+      type=bool,
+      default=False)
+    parser.add_argument(
+      "--reportfile",
+      dest="report_filename",
+      help=("The name of the file where the report log is "
+            "stored."
+            "(default: %(default)s)"),
+      default="report.txt")
+    args = parser.parse_args()
+
+    upgrade = ast_edits.ASTCodeUpgrader(TFAPIChangeSpec())
+    report_text = None
+    report_filename = args.report_filename
+    files_processed = 0
+    if args.input_file:
+        files_processed, report_text, errors = upgrade.process_file(args.input_file, args.output_file)
+        files_processed = 1
+    elif args.input_tree:
+        files_processed, report_text, errors = upgrade.process_tree(args.input_tree, args.output_tree,
+                                                                    args.copy_other_files)
+    else:
+        parser.print_help()
+    if report_text:
+        open(report_filename, "w").write(report_text)
+        print("TensorFlow 1.0 Upgrade Script")
+        print("-----------------------------")
+        print("Converted %d files\n" % files_processed)
+        print("Detected %d errors that require attention" % len(errors))
+        print("-" * 80)
+        print("\n".join(errors))
+        print("\nMake sure to read the detailed log %r\n" % report_filename)
diff --git a/misc/utils.py b/misc/utils.py
index 961954d..540dc72 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -6,9 +6,11 @@
 from __future__ import print_function
 
 import numpy as np
-import scipy.misc
 import os
 import errno
+import imageio
+
+from skimage.transform import resize
 
 
 def get_image(image_path, image_size, is_crop=False, bbox=None):
@@ -43,14 +45,12 @@ def transform(image, image_size, is_crop, bbox):
     image = colorize(image)
     if is_crop:
         image = custom_crop(image, bbox)
-    #
-    transformed_image =\
-        scipy.misc.imresize(image, [image_size, image_size], 'bicubic')
-    return np.array(transformed_image)
+    transformed_image = resize(image, [image_size, image_size], order=3)
+    return transformed_image
 
 
 def imread(path):
-    img = scipy.misc.imread(path)
+    img = imageio.imread(path)
     if len(img.shape) == 0:
         raise ValueError(path + " got loaded as a dimensionless array!")
     return img.astype(np.float)
@@ -65,6 +65,16 @@ def colorize(img):
     return img
 
 
+def convert_to_uint8(img):
+    img = (img + 1.) * (255 / 2.)
+    img = img.astype(np.uint8)
+    return img
+
+
+def caption_convert(caption):
+    return caption.decode("utf-8")
+
+
 def mkdir_p(path):
     try:
         os.makedirs(path)
diff --git a/models/README.md b/models/README.md
index a38141b..71ad6cb 100644
--- a/models/README.md
+++ b/models/README.md
@@ -1,9 +1,9 @@
 **Pretrained StackGAN Models**
-- [StackGAN for birds]() trained from char-CNN-RNN text embeddings. Download and save it to `models/`.
-- [StackGAN for flowers](https://drive.google.com/open?id=0B3y_msrWZaXLX01FMC1JQW9vaFk) trained from char-CNN-RNN text embeddings. Download and save it to `models/`.
-- [StackGAN for birds](https://drive.google.com/open?id=0B3y_msrWZaXLZVNRNFg4d055Q1E) trained from skip-thought text embeddings. Download and save it to `models/` (Just use the same setting as the char-CNN-RNN, we assume better results can be achieved by playing with the hyper-parameters).
+- [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from char-CNN-RNN text embeddings. Download and save it to `models/`.
+- [StackGAN for flowers]() trained from char-CNN-RNN text embeddings. Download and save it to `models/`.
+- [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from skip-thought text embeddings. Download and save it to `models/` (Just use the same setting as the char-CNN-RNN, we assume better results can be achieved by playing with the hyper-parameters).
 
 
 **char-CNN-RNN text encoders**
 - [Download](https://drive.google.com/file/d/0B0ywwgffWnLLZUt0UmQ1LU1oWlU/view) the char-CNN-RNN text encoder for flowers to `models/text_encoder/`.
-- [Download](https://drive.google.com/file/d/0B0ywwgffWnLLU0F3UHA3NzFTNEE/view) the char-CNN-RNN text encoder for birds to `models/text_encoder/`.
+- [Download](https://drive.google.com/open?id=1a11TUAQKrHxRWnzWBTLpK9FkZdZqhKlT) the char-CNN-RNN text encoder for birds to `models/text_encoder/`.
diff --git a/stageI/cfg/birds.yml b/stageI/cfg/birds.yml
index af02386..7537341 100644
--- a/stageI/cfg/birds.yml
+++ b/stageI/cfg/birds.yml
@@ -6,8 +6,8 @@ GPU_ID: 0
 Z_DIM: 100
 
 TRAIN:
-    FLAG: True
-    PRETRAINED_MODEL: ''
+    FLAG: False # True
+    PRETRAINED_MODEL: './ckt_logs/birds/stageI_2019_07_10_09_33_08/model_82000.ckpt' # ''
     BATCH_SIZE: 64
     NUM_COPY: 4
     MAX_EPOCH: 600
diff --git a/stageI/model.py b/stageI/model.py
index 15d8d9c..20e2652 100644
--- a/stageI/model.py
+++ b/stageI/model.py
@@ -1,11 +1,13 @@
 from __future__ import division
 from __future__ import print_function
 
-import prettytensor as pt
 import tensorflow as tf
-import misc.custom_ops
-from misc.custom_ops import leaky_rectify
-from misc.config import cfg
+import sys
+
+sys.path.append('misc')
+
+from custom_ops import fc, conv_batch_normalization, fc_batch_normalization, reshape, Conv2d, Deconv2d, UpSample, add
+from config import cfg
 
 
 class CondGAN(object):
@@ -17,208 +19,171 @@ def __init__(self, image_shape):
         self.df_dim = cfg.GAN.DF_DIM
         self.ef_dim = cfg.GAN.EMBEDDING_DIM
 
-        self.image_shape = image_shape
         self.s = image_shape[0]
-        self.s2, self.s4, self.s8, self.s16 =\
-            int(self.s / 2), int(self.s / 4), int(self.s / 8), int(self.s / 16)
-
-        # Since D is only used during training, we build a template
-        # for safe reuse the variables during computing loss for fake/real/wrong images
-        # We do not do this for G,
-        # because batch_norm needs different options for training and testing
-        if cfg.GAN.NETWORK_TYPE == "default":
-            with tf.variable_scope("d_net"):
-                self.d_encode_img_template = self.d_encode_image()
-                self.d_context_template = self.context_embedding()
-                self.discriminator_template = self.discriminator()
-        elif cfg.GAN.NETWORK_TYPE == "simple":
-            with tf.variable_scope("d_net"):
-                self.d_encode_img_template = self.d_encode_image_simple()
-                self.d_context_template = self.context_embedding()
-                self.discriminator_template = self.discriminator()
-        else:
-            raise NotImplementedError
+        self.s2, self.s4, self.s8, self.s16 = int(self.s / 2), int(self.s / 4), int(self.s / 8), int(self.s / 16)
 
     # g-net
     def generate_condition(self, c_var):
-        conditions =\
-            (pt.wrap(c_var).
-             flatten().
-             custom_fully_connected(self.ef_dim * 2).
-             apply(leaky_rectify, leakiness=0.2))
+        conditions = fc(c_var, self.ef_dim * 2, 'gen_cond/fc', activation_fn=tf.nn.leaky_relu)
         mean = conditions[:, :self.ef_dim]
         log_sigma = conditions[:, self.ef_dim:]
         return [mean, log_sigma]
 
-    def generator(self, z_var):
-        node1_0 =\
-            (pt.wrap(z_var).
-             flatten().
-             custom_fully_connected(self.s16 * self.s16 * self.gf_dim * 8).
-             fc_batch_norm().
-             reshape([-1, self.s16, self.s16, self.gf_dim * 8]))
-        node1_1 = \
-            (node1_0.
-             custom_conv2d(self.gf_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-        node1 = \
-            (node1_0.
-             apply(tf.add, node1_1).
-             apply(tf.nn.relu))
-
-        node2_0 = \
-            (node1.
-             # custom_deconv2d([0, self.s8, self.s8, self.gf_dim * 4], k_h=4, k_w=4).
-             apply(tf.image.resize_nearest_neighbor, [self.s8, self.s8]).
-             custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-        node2_1 = \
-            (node2_0.
-             custom_conv2d(self.gf_dim * 1, k_h=1, k_w=1, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 1, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-        node2 = \
-            (node2_0.
-             apply(tf.add, node2_1).
-             apply(tf.nn.relu))
-
-        output_tensor = \
-            (node2.
-             # custom_deconv2d([0, self.s4, self.s4, self.gf_dim * 2], k_h=4, k_w=4).
-             apply(tf.image.resize_nearest_neighbor, [self.s4, self.s4]).
-             custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             # custom_deconv2d([0, self.s2, self.s2, self.gf_dim], k_h=4, k_w=4).
-             apply(tf.image.resize_nearest_neighbor, [self.s2, self.s2]).
-             custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             # custom_deconv2d([0] + list(self.image_shape), k_h=4, k_w=4).
-             apply(tf.image.resize_nearest_neighbor, [self.s, self.s]).
-             custom_conv2d(3, k_h=3, k_w=3, d_h=1, d_w=1).
-             apply(tf.nn.tanh))
+    def generator(self, z_var, training=True):
+        node1_0 = fc(z_var, self.s16 * self.s16 * self.gf_dim * 8, 'g_n1.0/fc')
+        node1_0 = fc_batch_normalization(node1_0, 'g_n1.0/batch_norm')
+        node1_0 = reshape(node1_0, [-1, self.s16, self.s16, self.gf_dim * 8], name='g_n1.0/reshape')
+
+        node1_1 = Conv2d(node1_0, 1, 1, self.gf_dim * 2, 1, 1, name='g_n1.1/conv2d')
+        node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_1', activation_fn=tf.nn.relu,
+                                           is_training=training)
+        node1_1 = Conv2d(node1_1, 3, 3, self.gf_dim * 2, 1, 1, name='g_n1.1/conv2d2')
+        node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_2', activation_fn=tf.nn.relu,
+                                           is_training=training)
+        node1_1 = Conv2d(node1_1, 3, 3, self.gf_dim * 8, 1, 1, name='g_n1.1/conv2d3')
+        node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_3', activation_fn=tf.nn.relu,
+                                           is_training=training)
+
+        node1 = add([node1_0, node1_1], name='g_n1_res/add')
+        node1_output = tf.nn.relu(node1)
+
+        node2_0 = UpSample(node1_output, size=[self.s8, self.s8], method=1, align_corners=False, name='g_n2.0/upsample')
+        node2_0 = Conv2d(node2_0, 3, 3, self.gf_dim * 4, 1, 1, name='g_n2.0/conv2d')
+        node2_0 = conv_batch_normalization(node2_0, 'g_n2.0/batch_norm', is_training=training)
+
+        node2_1 = Conv2d(node2_0, 1, 1, self.gf_dim * 1, 1, 1, name='g_n2.1/conv2d')
+        node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm', activation_fn=tf.nn.relu, is_training=training)
+        node2_1 = Conv2d(node2_1, 3, 3, self.gf_dim * 1, 1, 1, name='g_n2.1/conv2d2')
+        node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm2', activation_fn=tf.nn.relu,
+                                           is_training=training)
+        node2_1 = Conv2d(node2_1, 3, 3, self.gf_dim * 4, 1, 1, name='g_n2.1/conv2d3')
+        node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm3', is_training=training)
+
+        node2 = add([node2_0, node2_1], name='g_n2_res/add')
+        node2_output = tf.nn.relu(node2)
+
+        output_tensor = UpSample(node2_output, size=[self.s4, self.s4], method=1, align_corners=False,
+                                 name='g_OT/upsample')
+        output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim * 2, 1, 1, name='g_OT/conv2d')
+        output_tensor = conv_batch_normalization(output_tensor, 'g_OT/batch_norm', activation_fn=tf.nn.relu,
+                                                 is_training=training)
+        output_tensor = UpSample(output_tensor, size=[self.s2, self.s2], method=1, align_corners=False,
+                                 name='g_OT/upsample2')
+        output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim, 1, 1, name='g_OT/conv2d2')
+        output_tensor = conv_batch_normalization(output_tensor, 'g_OT/batch_norm2', activation_fn=tf.nn.relu,
+                                                 is_training=training)
+        output_tensor = UpSample(output_tensor, size=[self.s, self.s], method=1, align_corners=False,
+                                 name='g_OT/upsample3')
+        output_tensor = Conv2d(output_tensor, 3, 3, 3, 1, 1, activation_fn=tf.nn.tanh, name='g_OT/conv2d3')
         return output_tensor
 
-    def generator_simple(self, z_var):
-        output_tensor =\
-            (pt.wrap(z_var).
-             flatten().
-             custom_fully_connected(self.s16 * self.s16 * self.gf_dim * 8).
-             reshape([-1, self.s16, self.s16, self.gf_dim * 8]).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_deconv2d([0, self.s8, self.s8, self.gf_dim * 4], k_h=4, k_w=4).
-             # apply(tf.image.resize_nearest_neighbor, [self.s8, self.s8]).
-             # custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_deconv2d([0, self.s4, self.s4, self.gf_dim * 2], k_h=4, k_w=4).
-             # apply(tf.image.resize_nearest_neighbor, [self.s4, self.s4]).
-             # custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_deconv2d([0, self.s2, self.s2, self.gf_dim], k_h=4, k_w=4).
-             # apply(tf.image.resize_nearest_neighbor, [self.s2, self.s2]).
-             # custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_deconv2d([0] + list(self.image_shape), k_h=4, k_w=4).
-             # apply(tf.image.resize_nearest_neighbor, [self.s, self.s]).
-             # custom_conv2d(3, k_h=3, k_w=3, d_h=1, d_w=1).
-             apply(tf.nn.tanh))
+    def generator_simple(self, z_var, training=True):
+        output_tensor = fc(z_var, self.s16 * self.s16 * self.gf_dim * 8, 'g_simple_OT/fc')
+        output_tensor = reshape(output_tensor, [-1, self.s16, self.s16, self.gf_dim * 8], name='g_simple_OT/reshape')
+        output_tensor = conv_batch_normalization(output_tensor, 'g_simple_OT/batch_norm', activation_fn=tf.nn.relu,
+                                                 is_training=training)
+        output_tensor = Deconv2d(output_tensor, [0, self.s8, self.s8, self.gf_dim * 4], name='g_simple_OT/deconv2d',
+                                 k_h=4, k_w=4)
+        output_tensor = conv_batch_normalization(output_tensor, 'g_simple_OT/batch_norm2', activation_fn=tf.nn.relu,
+                                                 is_training=training)
+        output_tensor = Deconv2d(output_tensor, [0, self.s4, self.s4, self.gf_dim * 2], name='g_simple_OT/deconv2d2',
+                                 k_h=4, k_w=4)
+        output_tensor = conv_batch_normalization(output_tensor, 'g_simple_OT/batch_norm3', activation_fn=tf.nn.relu,
+                                                 is_training=training)
+        output_tensor = Deconv2d(output_tensor, [0, self.s2, self.s2, self.gf_dim], name='g_simple_OT/deconv2d3',
+                                 k_h=4, k_w=4)
+        output_tensor = conv_batch_normalization(output_tensor, 'g_simple_OT/batch_norm4', activation_fn=tf.nn.relu,
+                                                 is_training=training)
+        output_tensor = Deconv2d(output_tensor, [0] + list(self.image_shape), name='g_simple_OT/deconv2d4',
+                                 k_h=4, k_w=4, activation_fn=tf.nn.tanh)
+
         return output_tensor
 
-    def get_generator(self, z_var):
+
+    def get_generator(self, z_var, is_training):
         if cfg.GAN.NETWORK_TYPE == "default":
-            return self.generator(z_var)
+            return self.generator(z_var, training=is_training)
         elif cfg.GAN.NETWORK_TYPE == "simple":
-            return self.generator_simple(z_var)
+            return self.generator_simple(z_var, training=is_training)
         else:
             raise NotImplementedError
 
     # d-net
-    def context_embedding(self):
-        template = (pt.template("input").
-                    custom_fully_connected(self.ef_dim).
-                    apply(leaky_rectify, leakiness=0.2))
+    def context_embedding(self, inputs=None, if_reuse=None):
+        template = fc(inputs, self.ef_dim, 'd_embedd/fc', activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
         return template
 
-    def d_encode_image(self):
-        node1_0 = \
-            (pt.template("input").
-             custom_conv2d(self.df_dim, k_h=4, k_w=4).
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 2, k_h=4, k_w=4).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 4, k_h=4, k_w=4).
-             conv_batch_norm().
-             custom_conv2d(self.df_dim * 8, k_h=4, k_w=4).
-             conv_batch_norm())
-        node1_1 = \
-            (node1_0.
-             custom_conv2d(self.df_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-
-        node1 = \
-            (node1_0.
-             apply(tf.add, node1_1).
-             apply(leaky_rectify, leakiness=0.2))
+    def d_encode_image(self, training=True, inputs=None, if_reuse=None):
+        node1_0 = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, name='d_n1.0/conv2d', activation_fn=tf.nn.leaky_relu,
+                         reuse=if_reuse)
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='d_n1.0/conv2d2', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='d_n1.0/conv2d3', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm2', is_training=training, reuse=if_reuse)
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='d_n1.0/conv2d4', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm3', is_training=training, reuse=if_reuse)
+
+        node1_1 = Conv2d(node1_0, 1, 1, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d2', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm2', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 8, 1, 1, name='d_n1.1/conv2d3', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm3', is_training=training, reuse=if_reuse)
+
+        node1 = add([node1_0, node1_1], name='d_n1_res/add')
+        node1 = tf.nn.leaky_relu(node1)
 
         return node1
 
-    def d_encode_image_simple(self):
-        template = \
-            (pt.template("input").
-             custom_conv2d(self.df_dim, k_h=4, k_w=4).
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 2, k_h=4, k_w=4).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 4, k_h=4, k_w=4).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 8, k_h=4, k_w=4).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2))
+    def d_encode_image_simple(self, training=True, inputs=None, if_reuse=None):
+        template = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, activation_fn=tf.nn.leaky_relu, name='d_template/conv2d',
+                          reuse=if_reuse)
+        template = Conv2d(template, 4, 4, self.df_dim * 2, 2, 2, name='d_template/conv2d2', reuse=if_reuse)
+        template = conv_batch_normalization(template, 'd_template/batch_norm', is_training=training,
+                                            activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        template = Conv2d(template, 4, 4, self.df_dim * 4, 2, 2, name='d_template/conv2d3', reuse=if_reuse)
+        template = conv_batch_normalization(template, 'd_template/batch_norm2', is_training=training,
+                                            activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        template = Conv2d(template, 4, 4, self.df_dim * 8, 2, 2, name='d_template/conv2d4', reuse=if_reuse)
+        template = conv_batch_normalization(template, 'd_template/batch_norm3', is_training=training,
+                                            activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
 
         return template
 
-    def discriminator(self):
-        template = \
-            (pt.template("input").  # 128*9*4*4
-             custom_conv2d(self.df_dim * 8, k_h=1, k_w=1, d_h=1, d_w=1).  # 128*8*4*4
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             # custom_fully_connected(1))
-             custom_conv2d(1, k_h=self.s16, k_w=self.s16, d_h=self.s16, d_w=self.s16))
+    def discriminator(self, training=True, inputs=None, if_reuse=None):
+        template = Conv2d(inputs, 1, 1, self.df_dim * 8, 1, 1, name='d_template/conv2d', reuse=if_reuse)
+        template = conv_batch_normalization(template, 'd_template/batch_norm', is_training=training,
+                                            activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        template = Conv2d(template, self.s16, self.s16, 1, self.s16, self.s16, name='d_template/conv2d2',
+                          reuse=if_reuse)
 
         return template
 
-    def get_discriminator(self, x_var, c_var):
-        x_code = self.d_encode_img_template.construct(input=x_var)
 
-        c_code = self.d_context_template.construct(input=c_var)
-        c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1)
-        c_code = tf.tile(c_code, [1, self.s16, self.s16, 1])
+    # Since D is only used during training, we build a template
+    # for safe reuse the variables during computing loss for fake/real/wrong images
+    # We do not do this for G,
+    # because batch_norm needs different options for training and testing
+    def get_discriminator(self, x_var, c_var, is_training, no_reuse=None):
+        if cfg.GAN.NETWORK_TYPE == "default":
+            x_code = self.d_encode_image(training=is_training, inputs=x_var, if_reuse=no_reuse)
+            c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse)
+            c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1)
+            c_code = tf.tile(c_code, [1, self.s16, self.s16, 1])
+            x_c_code = tf.concat([x_code, c_code], 3)
+
+            return self.discriminator(training=is_training, inputs=x_c_code, if_reuse=no_reuse)
+
+        elif cfg.GAN.NETWORK_TYPE == "simple":
+            x_code = self.d_encode_image_simple(training=is_training, inputs=x_var, if_reuse=no_reuse)
+            c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse)
+            c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1)
+            c_code = tf.tile(c_code, [1, self.s16, self.s16, 1])
+            x_c_code = tf.concat([x_code, c_code], 3)
 
-        x_c_code = tf.concat(3, [x_code, c_code])
-        return self.discriminator_template.construct(input=x_c_code)
+            return self.discriminator(training=is_training, inputs=x_c_code, if_reuse=no_reuse)
+        else:
+            raise NotImplementedError
diff --git a/stageI/run_exp.py b/stageI/run_exp.py
index 8535fea..53d0b8e 100644
--- a/stageI/run_exp.py
+++ b/stageI/run_exp.py
@@ -1,27 +1,26 @@
 from __future__ import division
 from __future__ import print_function
 
-import dateutil
 import dateutil.tz
 import datetime
 import argparse
 import pprint
 
-from misc.datasets import TextDataset
-from stageI.model import CondGAN
-from stageI.trainer import CondGANTrainer
-from misc.utils import mkdir_p
-from misc.config import cfg, cfg_from_file
+import sys
+sys.path.append('misc')
+sys.path.append('stageI')
+
+from datasets import TextDataset
+from utils import mkdir_p
+from config import cfg, cfg_from_file
+from model import CondGAN
+from trainer import CondGANTrainer
 
 
 def parse_args():
     parser = argparse.ArgumentParser(description='Train a GAN network')
-    parser.add_argument('--cfg', dest='cfg_file',
-                        help='optional config file',
-                        default=None, type=str)
-    parser.add_argument('--gpu', dest='gpu_id',
-                        help='GPU device id to use [0]',
-                        default=-1, type=int)
+    parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str)
+    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=-1, type=int)
     # if len(sys.argv) == 1:
     #    parser.print_help()
     #    sys.exit(1)
@@ -48,22 +47,16 @@ def parse_args():
         filename_train = '%s/train' % (datadir)
         dataset.train = dataset.get_data(filename_train)
 
-        ckt_logs_dir = "ckt_logs/%s/%s_%s" % \
-            (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
+        ckt_logs_dir = "ckt_logs/%s/%s_%s" % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
         mkdir_p(ckt_logs_dir)
     else:
         s_tmp = cfg.TRAIN.PRETRAINED_MODEL
         ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')]
 
-    model = CondGAN(
-        image_shape=dataset.image_shape
-    )
+    model = CondGAN(image_shape=dataset.image_shape)
+
+    algo = CondGANTrainer(model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir)
 
-    algo = CondGANTrainer(
-        model=model,
-        dataset=dataset,
-        ckt_logs_dir=ckt_logs_dir
-    )
     if cfg.TRAIN.FLAG:
         algo.train()
     else:
diff --git a/stageI/trainer.py b/stageI/trainer.py
index 001666a..981bc2e 100644
--- a/stageI/trainer.py
+++ b/stageI/trainer.py
@@ -1,18 +1,18 @@
 from __future__ import division
 from __future__ import print_function
+from six.moves import range
+from progressbar import ETA, Bar, Percentage, ProgressBar
 
-import prettytensor as pt
 import tensorflow as tf
 import numpy as np
-import scipy.misc
 import os
-import sys
-from six.moves import range
-from progressbar import ETA, Bar, Percentage, ProgressBar
+import imageio
 
+import sys
+sys.path.append('misc')
 
-from misc.config import cfg
-from misc.utils import mkdir_p
+from config import cfg
+from utils import mkdir_p
 
 TINY = 1e-8
 
@@ -26,12 +26,7 @@ def KL_loss(mu, log_sigma):
 
 
 class CondGANTrainer(object):
-    def __init__(self,
-                 model,
-                 dataset=None,
-                 exp_name="model",
-                 ckt_logs_dir="ckt_logs",
-                 ):
+    def __init__(self, model, dataset=None, exp_name="model", ckt_logs_dir="ckt_logs",):
         """
         :type model: RegularizedGAN
         """
@@ -48,28 +43,18 @@ def __init__(self,
 
         self.log_vars = []
 
+        tf.reset_default_graph()
+
     def build_placeholder(self):
-        '''Helper function for init_opt'''
-        self.images = tf.placeholder(
-            tf.float32, [self.batch_size] + self.dataset.image_shape,
-            name='real_images')
-        self.wrong_images = tf.placeholder(
-            tf.float32, [self.batch_size] + self.dataset.image_shape,
-            name='wrong_images'
-        )
-        self.embeddings = tf.placeholder(
-            tf.float32, [self.batch_size] + self.dataset.embedding_shape,
-            name='conditional_embeddings'
-        )
-
-        self.generator_lr = tf.placeholder(
-            tf.float32, [],
-            name='generator_learning_rate'
-        )
-        self.discriminator_lr = tf.placeholder(
-            tf.float32, [],
-            name='discriminator_learning_rate'
-        )
+        ''' Helper function for init_opt '''
+        self.images = tf.placeholder(tf.float32, [self.batch_size] + self.dataset.image_shape, name='real_images')
+        self.wrong_images = tf.placeholder(tf.float32, [self.batch_size] + self.dataset.image_shape,
+                                           name='wrong_images')
+        self.embeddings = tf.placeholder(tf.float32, [self.batch_size] + self.dataset.embedding_shape,
+                                         name='conditional_embeddings')
+
+        self.generator_lr = tf.placeholder(tf.float32, [], name='generator_learning_rate')
+        self.discriminator_lr = tf.placeholder(tf.float32, [], name='discriminator_learning_rate')
 
     def sample_encoded_context(self, embeddings):
         '''Helper function for init_opt'''
@@ -91,21 +76,19 @@ def sample_encoded_context(self, embeddings):
     def init_opt(self):
         self.build_placeholder()
 
-        with pt.defaults_scope(phase=pt.Phase.train):
-            with tf.variable_scope("g_net"):
-                # ####get output from G network################################
-                c, kl_loss = self.sample_encoded_context(self.embeddings)
-                z = tf.random_normal([self.batch_size, cfg.Z_DIM])
-                self.log_vars.append(("hist_c", c))
-                self.log_vars.append(("hist_z", z))
-                fake_images = self.model.get_generator(tf.concat(1, [c, z]))
+        with tf.variable_scope("g_net"):  # For training
+            # ####get output from G network################################
+            c, kl_loss = self.sample_encoded_context(self.embeddings)
+            z = tf.random_normal([self.batch_size, cfg.Z_DIM])
+            self.log_vars.append(("hist_c", c))
+            self.log_vars.append(("hist_z", z))
 
+            fake_images = self.model.get_generator(tf.concat([c, z], 1), True)  # set training to be True
+
+        with tf.variable_scope("d_net"):  # For training
             # ####get discriminator_loss and generator_loss ###################
-            discriminator_loss, generator_loss =\
-                self.compute_losses(self.images,
-                                    self.wrong_images,
-                                    fake_images,
-                                    self.embeddings)
+            discriminator_loss, generator_loss = self.compute_losses(self.images, self.wrong_images, fake_images,
+                                                                     self.embeddings)
             generator_loss += kl_loss
             self.log_vars.append(("g_loss_kl_loss", kl_loss))
             self.log_vars.append(("g_loss", generator_loss))
@@ -116,11 +99,11 @@ def init_opt(self):
             # #######define self.g_sum, self.d_sum,....########################
             self.define_summaries()
 
-        with pt.defaults_scope(phase=pt.Phase.test):
-            with tf.variable_scope("g_net", reuse=True):
-                self.sampler()
-            self.visualization(cfg.TRAIN.NUM_COPY)
-            print("success")
+        with tf.variable_scope("g_net", reuse=True):  # For testing
+            self.sampler()
+        self.visualization(cfg.TRAIN.NUM_COPY)
+        print("success")
+
 
     def sampler(self):
         c, _ = self.sample_encoded_context(self.embeddings)
@@ -128,62 +111,52 @@ def sampler(self):
             z = tf.zeros([self.batch_size, cfg.Z_DIM])  # Expect similar BGs
         else:
             z = tf.random_normal([self.batch_size, cfg.Z_DIM])
-        self.fake_images = self.model.get_generator(tf.concat(1, [c, z]))
+        self.fake_images = self.model.get_generator(tf.concat([c, z], 1), False)  # for testing
 
     def compute_losses(self, images, wrong_images, fake_images, embeddings):
-        real_logit = self.model.get_discriminator(images, embeddings)
-        wrong_logit = self.model.get_discriminator(wrong_images, embeddings)
-        fake_logit = self.model.get_discriminator(fake_images, embeddings)
+        real_logit = self.model.get_discriminator(images, embeddings, True)
+        # Reuse the weights
+        wrong_logit = self.model.get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE)
+        fake_logit = self.model.get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE)
 
-        real_d_loss =\
-            tf.nn.sigmoid_cross_entropy_with_logits(real_logit,
-                                                    tf.ones_like(real_logit))
+        real_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=real_logit, labels=tf.ones_like(real_logit))
         real_d_loss = tf.reduce_mean(real_d_loss)
-        wrong_d_loss =\
-            tf.nn.sigmoid_cross_entropy_with_logits(wrong_logit,
-                                                    tf.zeros_like(wrong_logit))
+        wrong_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=wrong_logit, labels=tf.zeros_like(wrong_logit))
         wrong_d_loss = tf.reduce_mean(wrong_d_loss)
-        fake_d_loss =\
-            tf.nn.sigmoid_cross_entropy_with_logits(fake_logit,
-                                                    tf.zeros_like(fake_logit))
+        fake_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logit, labels=tf.zeros_like(fake_logit))
         fake_d_loss = tf.reduce_mean(fake_d_loss)
         if cfg.TRAIN.B_WRONG:
-            discriminator_loss =\
-                real_d_loss + (wrong_d_loss + fake_d_loss) / 2.
+            discriminator_loss = real_d_loss + (wrong_d_loss + fake_d_loss) / 2.
             self.log_vars.append(("d_loss_wrong", wrong_d_loss))
         else:
             discriminator_loss = real_d_loss + fake_d_loss
         self.log_vars.append(("d_loss_real", real_d_loss))
         self.log_vars.append(("d_loss_fake", fake_d_loss))
 
-        generator_loss = \
-            tf.nn.sigmoid_cross_entropy_with_logits(fake_logit,
-                                                    tf.ones_like(fake_logit))
+        generator_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logit, labels=tf.ones_like(fake_logit))
         generator_loss = tf.reduce_mean(generator_loss)
 
         return discriminator_loss, generator_loss
 
     def prepare_trainer(self, generator_loss, discriminator_loss):
-        '''Helper function for init_opt'''
+        ''' Helper function for init_opt '''
         all_vars = tf.trainable_variables()
 
-        g_vars = [var for var in all_vars if
-                  var.name.startswith('g_')]
-        d_vars = [var for var in all_vars if
-                  var.name.startswith('d_')]
-
-        generator_opt = tf.train.AdamOptimizer(self.generator_lr,
-                                               beta1=0.5)
-        self.generator_trainer =\
-            pt.apply_optimizer(generator_opt,
-                               losses=[generator_loss],
-                               var_list=g_vars)
-        discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr,
-                                                   beta1=0.5)
-        self.discriminator_trainer =\
-            pt.apply_optimizer(discriminator_opt,
-                               losses=[discriminator_loss],
-                               var_list=d_vars)
+        g_vars = [var for var in all_vars if var.name.startswith('g_')]
+        d_vars = [var for var in all_vars if var.name.startswith('d_')]
+
+        # Update the trainable variables
+        update_ops_D = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith('d_')]
+        update_ops_G = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith('g_')]
+
+        with tf.control_dependencies(update_ops_G):  # Update the moving mean and variance from the batch normalization
+            generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5)
+            self.generator_trainer = generator_opt.minimize(generator_loss, var_list=g_vars)
+
+        with tf.control_dependencies(update_ops_D):  # Update the moving mean and variance from the batch normalization
+            discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5)
+            self.discriminator_trainer = discriminator_opt.minimize(discriminator_loss, var_list=d_vars)
+
         self.log_vars.append(("g_learning_rate", self.generator_lr))
         self.log_vars.append(("d_learning_rate", self.discriminator_lr))
 
@@ -192,15 +165,15 @@ def define_summaries(self):
         all_sum = {'g': [], 'd': [], 'hist': []}
         for k, v in self.log_vars:
             if k.startswith('g'):
-                all_sum['g'].append(tf.scalar_summary(k, v))
+                all_sum['g'].append(tf.summary.scalar(k, v))
             elif k.startswith('d'):
-                all_sum['d'].append(tf.scalar_summary(k, v))
+                all_sum['d'].append(tf.summary.scalar(k, v))
             elif k.startswith('hist'):
-                all_sum['hist'].append(tf.histogram_summary(k, v))
+                all_sum['hist'].append(tf.summary.histogram(k, v))
 
-        self.g_sum = tf.merge_summary(all_sum['g'])
-        self.d_sum = tf.merge_summary(all_sum['d'])
-        self.hist_sum = tf.merge_summary(all_sum['hist'])
+        self.g_sum = tf.summary.merge(all_sum['g'])
+        self.d_sum = tf.summary.merge(all_sum['d'])
+        self.hist_sum = tf.summary.merge(all_sum['hist'])
 
     def visualize_one_superimage(self, img_var, images, rows, filename):
         stacked_img = []
@@ -210,22 +183,18 @@ def visualize_one_superimage(self, img_var, images, rows, filename):
             for col in range(rows):
                 row_img.append(img_var[row * rows + col, :, :, :])
             # each rows is 1realimage +10_fakeimage
-            stacked_img.append(tf.concat(1, row_img))
-        imgs = tf.expand_dims(tf.concat(0, stacked_img), 0)
-        current_img_summary = tf.image_summary(filename, imgs)
+            stacked_img.append(tf.concat(row_img, 1))
+        imgs = tf.expand_dims(tf.concat(stacked_img, 0), 0)
+        current_img_summary = tf.summary.image(filename, imgs)
         return current_img_summary, imgs
 
     def visualization(self, n):
-        fake_sum_train, superimage_train = \
-            self.visualize_one_superimage(self.fake_images[:n * n],
-                                          self.images[:n * n],
-                                          n, "train")
-        fake_sum_test, superimage_test = \
-            self.visualize_one_superimage(self.fake_images[n * n:2 * n * n],
-                                          self.images[n * n:2 * n * n],
-                                          n, "test")
-        self.superimages = tf.concat(0, [superimage_train, superimage_test])
-        self.image_summary = tf.merge_summary([fake_sum_train, fake_sum_test])
+        fake_sum_train, superimage_train = self.visualize_one_superimage(self.fake_images[:n * n], self.images[:n * n],
+                                                                         n, "train")
+        fake_sum_test, superimage_test = self.visualize_one_superimage(self.fake_images[n * n:2 * n * n],
+                                                                       self.images[n * n:2 * n * n], n, "test")
+        self.superimages = tf.concat([superimage_train, superimage_test], 0)
+        self.image_summary = tf.summary.merge([fake_sum_train, fake_sum_test])
 
     def preprocess(self, x, n):
         # make sure every row with n column have the same embeddings
@@ -235,33 +204,29 @@ def preprocess(self, x, n):
         return x
 
     def epoch_sum_images(self, sess, n):
-        images_train, _, embeddings_train, captions_train, _ =\
-            self.dataset.train.next_batch(n * n, cfg.TRAIN.NUM_EMBEDDING)
+        images_train, _, embeddings_train, captions_train, _ = self.dataset.train.next_batch(n * n,
+                                                                                             cfg.TRAIN.NUM_EMBEDDING)
         images_train = self.preprocess(images_train, n)
         embeddings_train = self.preprocess(embeddings_train, n)
 
-        images_test, _, embeddings_test, captions_test, _ = \
-            self.dataset.test.next_batch(n * n, 1)
+        images_test, _, embeddings_test, captions_test, _ = self.dataset.test.next_batch(n * n, 1)
         images_test = self.preprocess(images_test, n)
         embeddings_test = self.preprocess(embeddings_test, n)
 
         images = np.concatenate([images_train, images_test], axis=0)
-        embeddings =\
-            np.concatenate([embeddings_train, embeddings_test], axis=0)
+        embeddings = np.concatenate([embeddings_train, embeddings_test], axis=0)
 
         if self.batch_size > 2 * n * n:
-            images_pad, _, embeddings_pad, _, _ =\
-                self.dataset.test.next_batch(self.batch_size - 2 * n * n, 1)
+            images_pad, _, embeddings_pad, _, _ = self.dataset.test.next_batch(self.batch_size - 2 * n * n, 1)
             images = np.concatenate([images, images_pad], axis=0)
             embeddings = np.concatenate([embeddings, embeddings_pad], axis=0)
         feed_dict = {self.images: images,
                      self.embeddings: embeddings}
-        gen_samples, img_summary =\
-            sess.run([self.superimages, self.image_summary], feed_dict)
+        gen_samples, img_summary = sess.run([self.superimages, self.image_summary], feed_dict)
 
         # save images generated for train and test captions
-        scipy.misc.imsave('%s/train.jpg' % (self.log_dir), gen_samples[0])
-        scipy.misc.imsave('%s/test.jpg' % (self.log_dir), gen_samples[1])
+        imageio.imwrite('%s/train.jpg' % (self.log_dir), gen_samples[0])
+        imageio.imwrite('%s/test.jpg' % (self.log_dir), gen_samples[1])
 
         # pfi_train = open(self.log_dir + "/train.txt", "w")
         pfi_test = open(self.log_dir + "/test.txt", "w")
@@ -278,11 +243,11 @@ def epoch_sum_images(self, sess, n):
 
     def build_model(self, sess):
         self.init_opt()
-        sess.run(tf.initialize_all_variables())
+        sess.run(tf.global_variables_initializer())
 
         if len(self.model_path) > 0:
             print("Reading model parameters from %s" % self.model_path)
-            restore_vars = tf.all_variables()
+            restore_vars = tf.global_variables()
             # all_vars = tf.all_variables()
             # restore_vars = [var for var in all_vars if
             #                 var.name.startswith('g_') or
@@ -301,15 +266,14 @@ def build_model(self, sess):
 
     def train(self):
         config = tf.ConfigProto(allow_soft_placement=True)
+        config.gpu_options.per_process_gpu_memory_fraction = 0.7
         with tf.Session(config=config) as sess:
             with tf.device("/gpu:%d" % cfg.GPU_ID):
                 counter = self.build_model(sess)
-                saver = tf.train.Saver(tf.all_variables(),
-                                       keep_checkpoint_every_n_hours=2)
+                saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=2)
 
                 # summary_op = tf.merge_all_summaries()
-                summary_writer = tf.train.SummaryWriter(self.log_dir,
-                                                        sess.graph)
+                summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph)
 
                 keys = ["d_loss", "g_loss"]
                 log_vars = []
@@ -327,10 +291,8 @@ def train(self):
                 updates_per_epoch = int(number_example / self.batch_size)
                 epoch_start = int(counter / updates_per_epoch)
                 for epoch in range(epoch_start, self.max_epoch):
-                    widgets = ["epoch #%d|" % epoch,
-                               Percentage(), Bar(), ETA()]
-                    pbar = ProgressBar(maxval=updates_per_epoch,
-                                       widgets=widgets)
+                    widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
+                    pbar = ProgressBar(maxval=updates_per_epoch, widgets=widgets)
                     pbar.start()
 
                     if epoch % lr_decay_step == 0 and epoch != 0:
@@ -341,8 +303,7 @@ def train(self):
                     for i in range(updates_per_epoch):
                         pbar.update(i)
                         # training d
-                        images, wrong_images, embeddings, _, _ =\
-                            self.dataset.train.next_batch(self.batch_size,
+                        images, wrong_images, embeddings, _, _ = self.dataset.train.next_batch(self.batch_size,
                                                           num_embedding)
                         feed_dict = {self.images: images,
                                      self.wrong_images: wrong_images,
@@ -351,28 +312,19 @@ def train(self):
                                      self.discriminator_lr: discriminator_lr
                                      }
                         # train d
-                        feed_out = [self.discriminator_trainer,
-                                    self.d_sum,
-                                    self.hist_sum,
-                                    log_vars]
-                        _, d_sum, hist_sum, log_vals = sess.run(feed_out,
-                                                                feed_dict)
+                        feed_out = [self.discriminator_trainer, self.d_sum, self.hist_sum, log_vars]
+                        _, d_sum, hist_sum, log_vals = sess.run(feed_out, feed_dict)
                         summary_writer.add_summary(d_sum, counter)
                         summary_writer.add_summary(hist_sum, counter)
                         all_log_vals.append(log_vals)
                         # train g
-                        feed_out = [self.generator_trainer,
-                                    self.g_sum]
-                        _, g_sum = sess.run(feed_out,
-                                            feed_dict)
+                        feed_out = [self.generator_trainer, self.g_sum]
+                        _, g_sum = sess.run(feed_out, feed_dict)
                         summary_writer.add_summary(g_sum, counter)
                         # save checkpoint
                         counter += 1
                         if counter % self.snapshot_interval == 0:
-                            snapshot_path = "%s/%s_%s.ckpt" %\
-                                             (self.checkpoint_dir,
-                                              self.exp_name,
-                                              str(counter))
+                            snapshot_path = "%s/%s_%s.ckpt" % (self.checkpoint_dir, self.exp_name, str(counter))
                             fn = saver.save(sess, snapshot_path)
                             print("Model saved in file: %s" % fn)
 
@@ -385,21 +337,17 @@ def train(self):
                         dic_logs[k] = v
                         # print(k, v)
 
-                    log_line = "; ".join("%s: %s" %
-                                         (str(k), str(dic_logs[k]))
-                                         for k in dic_logs)
+                    log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k])) for k in dic_logs)
                     print("Epoch %d | " % (epoch) + log_line)
                     sys.stdout.flush()
                     if np.any(np.isnan(avg_log_vals)):
                         raise ValueError("NaN detected!")
 
-    def save_super_images(self, images, sample_batchs, filenames,
-                          sentenceID, save_dir, subset):
+    def save_super_images(self, images, sample_batchs, filenames, sentenceID, save_dir, subset):
         # batch_size samples for each embedding
         numSamples = len(sample_batchs)
         for j in range(len(filenames)):
-            s_tmp = '%s-1real-%dsamples/%s/%s' %\
-                (save_dir, numSamples, subset, filenames[j])
+            s_tmp = '%s-1real-%dsamples/%s/%s' % (save_dir, numSamples, subset, filenames[j])
             folder = s_tmp[:s_tmp.rfind('/')]
             if not os.path.isdir(folder):
                 print('Make a new folder: ', folder)
@@ -411,42 +359,40 @@ def save_super_images(self, images, sample_batchs, filenames,
 
             superimage = np.concatenate(superimage, axis=1)
             fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID)
-            scipy.misc.imsave(fullpath, superimage)
+            imageio.imwrite(fullpath, superimage)
 
     def eval_one_dataset(self, sess, dataset, save_dir, subset='train'):
         count = 0
         print('num_examples:', dataset._num_examples)
         while count < dataset._num_examples:
             start = count % dataset._num_examples
-            images, embeddings_batchs, filenames, _ =\
-                dataset.next_batch_test(self.batch_size, start, 1)
+            images, embeddings_batchs, filenames, _ = dataset.next_batch_test(self.batch_size, start, 1)
             print('count = ', count, 'start = ', start)
             for i in range(len(embeddings_batchs)):
                 samples_batchs = []
                 # Generate up to 16 images for each sentence,
                 # with randomness from noise z and conditioning augmentation.
                 for j in range(np.minimum(16, cfg.TRAIN.NUM_COPY)):
-                    samples = sess.run(self.fake_images,
-                                       {self.embeddings: embeddings_batchs[i]})
+                    samples = sess.run(self.fake_images, {self.embeddings: embeddings_batchs[i]})
                     samples_batchs.append(samples)
-                self.save_super_images(images, samples_batchs,
-                                       filenames, i, save_dir,
-                                       subset)
+                self.save_super_images(images, samples_batchs, filenames, i, save_dir, subset)
 
             count += self.batch_size
 
     def evaluate(self):
         config = tf.ConfigProto(allow_soft_placement=True)
+        config.gpu_options.per_process_gpu_memory_fraction = 0.7
         with tf.Session(config=config) as sess:
             with tf.device("/gpu:%d" % cfg.GPU_ID):
                 if self.model_path.find('.ckpt') != -1:
                     self.init_opt()
                     print("Reading model parameters from %s" % self.model_path)
-                    saver = tf.train.Saver(tf.all_variables())
+                    saver = tf.train.Saver(tf.global_variables())
+                    print(tf.global_variables())
                     saver.restore(sess, self.model_path)
-                    # self.eval_one_dataset(sess, self.dataset.train,
-                    #                       self.log_dir, subset='train')
-                    self.eval_one_dataset(sess, self.dataset.test,
-                                          self.log_dir, subset='test')
+
+                    # self.eval_one_dataset(sess, self.dataset.train, self.log_dir, subset='train')
+
+                    self.eval_one_dataset(sess, self.dataset.test, self.log_dir, subset='test')
                 else:
                     print("Input a valid model path.")
diff --git a/stageII/__init__.py b/stageII/__init__.py
index f78a8b1..008827b 100644
--- a/stageII/__init__.py
+++ b/stageII/__init__.py
@@ -1,2 +1,3 @@
 from __future__ import division
 from __future__ import print_function
+
diff --git a/stageII/cfg/birds.yml b/stageII/cfg/birds.yml
index 4e3ce8d..e0c805f 100644
--- a/stageII/cfg/birds.yml
+++ b/stageII/cfg/birds.yml
@@ -7,9 +7,9 @@ Z_DIM: 100
 
 TRAIN:
     FLAG: True
-    PRETRAINED_MODEL: './ckt_logs/birds/stageI/model_82000.ckpt'
+    PRETRAINED_MODEL: './models/stageI/model_82000.ckpt'
     PRETRAINED_EPOCH: 600
-    BATCH_SIZE: 64
+    BATCH_SIZE: 64 # 32 (if you do not have enough space)
     NUM_COPY: 4
     MAX_EPOCH: 1200
     SNAPSHOT_INTERVAL: 2000
@@ -19,6 +19,7 @@ TRAIN:
     NUM_EMBEDDING: 4
     COEFF:
       KL: 2.0
+    FINETUNE_LR: True
 
 GAN:
     EMBEDDING_DIM: 128
diff --git a/stageII/model.py b/stageII/model.py
index 28aee30..a4bddfa 100644
--- a/stageII/model.py
+++ b/stageII/model.py
@@ -1,14 +1,12 @@
 from __future__ import division
 from __future__ import print_function
 
-import prettytensor as pt
 import tensorflow as tf
-import misc.custom_ops
-from misc.custom_ops import leaky_rectify
-from misc.config import cfg
+import sys
+sys.path.append('misc')
 
-# TODO:  Does template.constrct() really shared the computation
-# when multipel times of construct are done
+from custom_ops import fc, conv_batch_normalization, fc_batch_normalization, reshape, Conv2d, UpSample, add
+from config import cfg
 
 
 class CondGAN(object):
@@ -22,300 +20,279 @@ def __init__(self, lr_imsize, hr_lr_ratio):
 
         self.s = lr_imsize
         print('lr_imsize: ', lr_imsize)
-        self.s2, self.s4, self.s8, self.s16 = \
-            int(self.s / 2), int(self.s / 4), int(self.s / 8), int(self.s / 16)
-        if cfg.GAN.NETWORK_TYPE == "default":
-            with tf.variable_scope("d_net"):
-                self.d_context_template = self.context_embedding()
-                self.d_image_template = self.d_encode_image()
-                self.d_discriminator_template = self.discriminator()
-
-            with tf.variable_scope("hr_d_net"):
-                self.hr_d_context_template = self.context_embedding()
-                self.hr_d_image_template = self.hr_d_encode_image()
-                self.hr_discriminator_template = self.discriminator()
-        else:
-            raise NotImplementedError
+        self.s2, self.s4, self.s8, self.s16 = int(self.s / 2), int(self.s / 4), int(self.s / 8), int(self.s / 16)
 
     # conditioning augmentation structure for text embedding
     # are shared by g and hr_g
     # g and hr_g build this structure separately and do not share parameters
     def generate_condition(self, c_var):
-        conditions =\
-            (pt.wrap(c_var).
-             flatten().
-             custom_fully_connected(self.ef_dim * 2).
-             apply(leaky_rectify, leakiness=0.2))
+        conditions = fc(c_var, self.ef_dim * 2, 'gen_cond/fc', activation_fn=tf.nn.leaky_relu)
         mean = conditions[:, :self.ef_dim]
         log_sigma = conditions[:, self.ef_dim:]
         return [mean, log_sigma]
 
     # stage I generator (g)
-    def generator(self, z_var):
-        node1_0 =\
-            (pt.wrap(z_var).
-             flatten().
-             custom_fully_connected(self.s16 * self.s16 * self.gf_dim * 8).
-             fc_batch_norm().
-             reshape([-1, self.s16, self.s16, self.gf_dim * 8]))
-        node1_1 = \
-            (node1_0.
-             custom_conv2d(self.gf_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-        node1 = \
-            (node1_0.
-             apply(tf.add, node1_1).
-             apply(tf.nn.relu))
-
-        node2_0 = \
-            (node1.
-             # custom_deconv2d([0, self.s8, self.s8, self.gf_dim * 4], k_h=4, k_w=4).
-             apply(tf.image.resize_nearest_neighbor, [self.s8, self.s8]).
-             custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-        node2_1 = \
-            (node2_0.
-             custom_conv2d(self.gf_dim * 1, k_h=1, k_w=1, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 1, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-        node2 = \
-            (node2_0.
-             apply(tf.add, node2_1).
-             apply(tf.nn.relu))
-
-        output_tensor = \
-            (node2.
-             # custom_deconv2d([0, self.s4, self.s4, self.gf_dim * 2], k_h=4, k_w=4).
-             apply(tf.image.resize_nearest_neighbor, [self.s4, self.s4]).
-             custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             # custom_deconv2d([0, self.s2, self.s2, self.gf_dim], k_h=4, k_w=4).
-             apply(tf.image.resize_nearest_neighbor, [self.s2, self.s2]).
-             custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             # custom_deconv2d([0] + list(self.image_shape), k_h=4, k_w=4).
-             apply(tf.image.resize_nearest_neighbor, [self.s, self.s]).
-             custom_conv2d(3, k_h=3, k_w=3, d_h=1, d_w=1).
-             apply(tf.nn.tanh))
+    def generator(self, z_var, training=True):
+        node1_0 = fc(z_var, self.s16 * self.s16 * self.gf_dim * 8, 'g_n1.0/fc')
+        node1_0 = fc_batch_normalization(node1_0, 'g_n1.0/batch_norm')
+        node1_0 = reshape(node1_0, [-1, self.s16, self.s16, self.gf_dim * 8], name='g_n1.0/reshape')
+
+        node1_1 = Conv2d(node1_0, 1, 1, self.gf_dim * 2, 1, 1, name='g_n1.1/conv2d')
+        node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_1', activation_fn=tf.nn.relu,
+                                           is_training=training)
+        node1_1 = Conv2d(node1_1, 3, 3, self.gf_dim * 2, 1, 1, name='g_n1.1/conv2d2')
+        node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_2', activation_fn=tf.nn.relu,
+                                           is_training=training)
+        node1_1 = Conv2d(node1_1, 3, 3, self.gf_dim * 8, 1, 1, name='g_n1.1/conv2d3')
+        node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_3', activation_fn=tf.nn.relu,
+                                           is_training=training)
+
+        node1 = add([node1_0, node1_1], name='g_n1_res/add')
+        node1_output = tf.nn.relu(node1)
+
+        node2_0 = UpSample(node1_output, size=[self.s8, self.s8], method=1, align_corners=False, name='g_n2.0/upsample')
+        node2_0 = Conv2d(node2_0, 3, 3, self.gf_dim * 4, 1, 1, name='g_n2.0/conv2d')
+        node2_0 = conv_batch_normalization(node2_0, 'g_n2.0/batch_norm', is_training=training)
+
+        node2_1 = Conv2d(node2_0, 1, 1, self.gf_dim * 1, 1, 1, name='g_n2.1/conv2d')
+        node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm', activation_fn=tf.nn.relu,
+                                           is_training=training)
+        node2_1 = Conv2d(node2_1, 3, 3, self.gf_dim * 1, 1, 1, name='g_n2.1/conv2d2')
+        node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm2', activation_fn=tf.nn.relu,
+                                           is_training=training)
+        node2_1 = Conv2d(node2_1, 3, 3, self.gf_dim * 4, 1, 1, name='g_n2.1/conv2d3')
+        node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm3', is_training=training)
+
+        node2 = add([node2_0, node2_1], name='g_n2_res/add')
+        node2_output = tf.nn.relu(node2)
+
+        output_tensor = UpSample(node2_output, size=[self.s4, self.s4], method=1, align_corners=False,
+                                 name='g_OT/upsample')
+        output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim * 2, 1, 1, name='g_OT/conv2d')
+        output_tensor = conv_batch_normalization(output_tensor, 'g_OT/batch_norm', activation_fn=tf.nn.relu,
+                                                 is_training=training)
+        output_tensor = UpSample(output_tensor, size=[self.s2, self.s2], method=1, align_corners=False,
+                                 name='g_OT/upsample2')
+        output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim, 1, 1, name='g_OT/conv2d2')
+        output_tensor = conv_batch_normalization(output_tensor, 'g_OT/batch_norm2', activation_fn=tf.nn.relu,
+                                                 is_training=training)
+        output_tensor = UpSample(output_tensor, size=[self.s, self.s], method=1, align_corners=False,
+                                 name='g_OT/upsample3')
+        output_tensor = Conv2d(output_tensor, 3, 3, 3, 1, 1, activation_fn=tf.nn.tanh, name='g_OT/conv2d3')
+
         return output_tensor
 
-    def get_generator(self, z_var):
+    def get_generator(self, z_var, is_training):
         if cfg.GAN.NETWORK_TYPE == "default":
-            return self.generator(z_var)
+            return self.generator(z_var, training=is_training)
         else:
             raise NotImplementedError
 
     # stage II generator (hr_g)
-    def residual_block(self, x_c_code):
-        node0_0 = pt.wrap(x_c_code)  # -->s4 * s4 * gf_dim * 4
-        node0_1 = \
-            (pt.wrap(x_c_code).  # -->s4 * s4 * gf_dim * 4
-             custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-        output_tensor = \
-            (node0_0.
-             apply(tf.add, node0_1).
-             apply(tf.nn.relu))
+    def residual_block(self, x_c_code, name, training=True):
+        node0_0 = x_c_code  # -->s4 * s4 * gf_dim * 4
+
+        node0_1 = Conv2d(x_c_code, 3, 3, self.gf_dim * 4, 1, 1, name=name+'/conv2d')
+        node0_1 = conv_batch_normalization(node0_1, name+'/batch_norm', is_training=training,
+                                           activation_fn=tf.nn.relu)
+        node0_1 = Conv2d(node0_1, 3, 3, self.gf_dim * 4, 1, 1, name=name+'/conv2d2')
+        node0_1 = conv_batch_normalization(node0_1, name+'/batch_norm2', is_training=training)
+
+        output_tensor = add([node0_0, node0_1], name='resid_block/add')
+        output_tensor = tf.nn.relu(output_tensor)
+
         return output_tensor
 
-    def hr_g_encode_image(self, x_var):
-        output_tensor = \
-            (pt.wrap(x_var).  # -->s * s * 3
-             custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1).  # s * s * gf_dim
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 2, k_h=4, k_w=4).  # s2 * s2 * gf_dim * 2
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(self.gf_dim * 4, k_h=4, k_w=4).  # s4 * s4 * gf_dim * 4
-             conv_batch_norm().
-             apply(tf.nn.relu))
+    def hr_g_encode_image(self, x_var, training=True):  # input: x_var --> s * s * 3
+        # s * s * gf_dim
+        output_tensor = Conv2d(x_var, 3, 3, self.gf_dim, 1, 1, activation_fn=tf.nn.relu, name='hr_g_OT/conv2d')
+
+        # s2 * s2 * gf_dim * 2
+        output_tensor = Conv2d(output_tensor, 4, 4, self.gf_dim * 2, 2, 2, name='hr_g_OT/conv2d2')
+        output_tensor = conv_batch_normalization(output_tensor, 'hr_g_OT/batch_norm', is_training=training,
+                                                 activation_fn=tf.nn.relu)
+        # s4 * s4 * gf_dim * 4
+        output_tensor = Conv2d(output_tensor, 4, 4, self.gf_dim * 4, 2, 2, name='hr_g_OT/conv2d3')
+        output_tensor = conv_batch_normalization(output_tensor, 'hr_g_OT/batch_norm2', is_training=training,
+                                                 activation_fn=tf.nn.relu)
         return output_tensor
 
-    def hr_g_joint_img_text(self, x_c_code):
-        output_tensor = \
-            (pt.wrap(x_c_code).  # -->s4 * s4 * (ef_dim+gf_dim*4)
-             custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1).  # s4 * s4 * gf_dim * 4
-             conv_batch_norm().
-             apply(tf.nn.relu))
+    def hr_g_joint_img_text(self, x_c_code, training=True):  # input: x_code: -->s4 * s4 * (ef_dim+gf_dim*4)
+        # s4 * s4 * gf_dim * 4
+        output_tensor = Conv2d(x_c_code, 3, 3, self.gf_dim * 4, 1, 1, name='hr_g_joint_OT/conv2d')
+        output_tensor = conv_batch_normalization(output_tensor, 'hr_g_joint_OT/batch_norm', is_training=training,
+                                                 activation_fn=tf.nn.relu)
         return output_tensor
 
-    def hr_generator(self, x_c_code):
-        output_tensor = \
-            (pt.wrap(x_c_code).  # -->s4 * s4 * gf_dim*4
-             # custom_deconv2d([0, self.s2, self.s2, self.gf_dim * 2], k_h=4, k_w=4).  # -->s2 * s2 * gf_dim*2
-             apply(tf.image.resize_nearest_neighbor, [self.s2, self.s2]).
-             custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             # custom_deconv2d([0, self.s, self.s, self.gf_dim], k_h=4, k_w=4).  # -->s * s * gf_dim
-             apply(tf.image.resize_nearest_neighbor, [self.s, self.s]).
-             custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             # custom_deconv2d([0, self.s * 2, self.s * 2, self.gf_dim // 2], k_h=4, k_w=4).  # -->2s * 2s * gf_dim/2
-             apply(tf.image.resize_nearest_neighbor, [self.s * 2, self.s * 2]).
-             custom_conv2d(self.gf_dim // 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             # custom_deconv2d([0, self.s * 4, self.s * 4, self.gf_dim // 4], k_h=4, k_w=4).  # -->4s * 4s * gf_dim//4
-             apply(tf.image.resize_nearest_neighbor, [self.s * 4, self.s * 4]).
-             custom_conv2d(self.gf_dim // 4, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(tf.nn.relu).
-             custom_conv2d(3, k_h=3, k_w=3, d_h=1, d_w=1).  # -->4s * 4s * 3
-             apply(tf.nn.tanh))
+    def hr_generator(self, x_c_code, training=True):  # Input: x_c_code -->s4 * s4 * gf_dim*4
+        output_tensor = UpSample(x_c_code, size=[self.s2, self.s2], method=1, align_corners=False,
+                                 name='hr_gen/upsample')
+        output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim * 2, 1, 1, name='hr_gen/conv2d')
+        output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm', is_training=training,
+                                                 activation_fn=tf.nn.relu)
+        output_tensor = UpSample(output_tensor, size=[self.s, self.s], method=1, align_corners=False,
+                                 name='hr_gen/upsample2')
+        output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim, 1, 1, name='hr_gen/conv2d2')
+        output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm2', is_training=training,
+                                                 activation_fn=tf.nn.relu)
+        output_tensor = UpSample(output_tensor, size=[self.s * 2, self.s * 2], method=1, align_corners=False,
+                                 name='hr_gen/upsample3')
+        output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim//2, 1, 1, name='hr_gen/conv2d3')
+        output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm3', is_training=training,
+                                                 activation_fn=tf.nn.relu)
+        output_tensor = UpSample(output_tensor, size=[self.s * 4, self.s * 4], method=1, align_corners=False,
+                                 name='hr_gen/upsample3')
+        output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim//4, 1, 1, name='hr_gen/conv2d4')
+        output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm4', is_training=training,
+                                                 activation_fn=tf.nn.relu)
+        # -->4s * 4s * 3
+        output_tensor = Conv2d(output_tensor, 3, 3, 3, 1, 1, name='hr_gen/conv2d5', activation_fn=tf.nn.tanh)
         return output_tensor
 
-    def hr_get_generator(self, x_var, c_code):
+    def hr_get_generator(self, x_var, c_code, is_training):
         if cfg.GAN.NETWORK_TYPE == "default":
             # image x_var: self.s * self.s *3
-            x_code = self.hr_g_encode_image(x_var)  # -->s4 * s4 * gf_dim * 4
+            x_code = self.hr_g_encode_image(x_var, training=is_training)  # -->s4 * s4 * gf_dim * 4
 
             # text c_code: ef_dim
             c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1)
             c_code = tf.tile(c_code, [1, self.s4, self.s4, 1])
 
             # combine both --> s4 * s4 * (ef_dim+gf_dim*4)
-            x_c_code = tf.concat(3, [x_code, c_code])
+            x_c_code = tf.concat([x_code, c_code], 3)
 
             # Joint learning from text and image -->s4 * s4 * gf_dim * 4
             node0 = self.hr_g_joint_img_text(x_c_code)
-            node1 = self.residual_block(node0)
-            node2 = self.residual_block(node1)
-            node3 = self.residual_block(node2)
-            node4 = self.residual_block(node3)
+            node1 = self.residual_block(node0, 'node1_resid_block', training=is_training)
+            node2 = self.residual_block(node1, 'node2_resid_block', training=is_training)
+            node3 = self.residual_block(node2, 'node3_resid_block', training=is_training)
+            node4 = self.residual_block(node3, 'node4_resid_block', training=is_training)
 
             # Up-sampling
-            return self.hr_generator(node4)  # -->4s * 4s * 3
+            return self.hr_generator(node4, training=is_training)  # -->4s * 4s * 3
         else:
             raise NotImplementedError
 
     # structure shared by d and hr_d
     # d and hr_d build this structure separately and do not share parameters
-    def context_embedding(self):
-        template = (pt.template("input").
-                    custom_fully_connected(self.ef_dim).
-                    apply(leaky_rectify, leakiness=0.2))
+    def context_embedding(self, inputs=None, if_reuse=None):
+        template = fc(inputs, self.ef_dim, 'd_embedd/fc', activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+
         return template
 
-    def discriminator(self):
-        template = \
-            (pt.template("input").  # s16 * s16 * 128*9
-             custom_conv2d(self.df_dim * 8, k_h=1, k_w=1, d_h=1, d_w=1).  # s16 * s16 * 128*8
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             # custom_fully_connected(1))
-             custom_conv2d(1, k_h=self.s16, k_w=self.s16, d_h=self.s16, d_w=self.s16))
+    def discriminator(self, training=True, inputs=None, if_reuse=None):
+        template = Conv2d(inputs, 1, 1, self.df_dim * 8, 1, 1, name='d_template/conv2d', reuse=if_reuse)
+        template = conv_batch_normalization(template, 'd_template/batch_norm', is_training=training,
+                                            activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        template = Conv2d(template, self.s16, self.s16, 1, self.s16, self.s16, name='d_template/conv2d2',
+                          reuse=if_reuse)
 
         return template
 
     # d-net
-    def d_encode_image(self):
-        node1_0 = \
-            (pt.template("input").  # s * s * 3
-             custom_conv2d(self.df_dim, k_h=4, k_w=4).  # s2 * s2 * df_dim
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 2, k_h=4, k_w=4).  # s4 * s4 * df_dim*2
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 4, k_h=4, k_w=4).  # s8 * s8 * df_dim*4
-             conv_batch_norm().
-             custom_conv2d(self.df_dim * 8, k_h=4, k_w=4).  # s16 * s16 * df_dim*8
-             conv_batch_norm())
-        node1_1 = \
-            (node1_0.
-             custom_conv2d(self.df_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-
-        node1 = \
-            (node1_0.
-             apply(tf.add, node1_1).
-             apply(leaky_rectify, leakiness=0.2))
+    def d_encode_image(self, inputs=None, training=True, if_reuse=None):
+        # input: s * s * 3
+        node1_0 = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, activation_fn=tf.nn.leaky_relu, name='d_n1.0/conv2d',
+                         reuse=if_reuse)  # s2 * s2 * df_dim
+
+        # s4 * s4 * df_dim*2
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='d_n1.0/conv2d2', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        # s8 * s8 * df_dim*4
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='d_n1.0/conv2d3', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm2', is_training=training, reuse=if_reuse)
+        # s16 * s16 * df_dim*8
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='d_n1.0/conv2d4', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm3', is_training=training, reuse=if_reuse)
+
+        node1_1 = Conv2d(node1_0, 1, 1, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d2', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm2', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 8, 1, 1, name='d_n1.1/conv2d3', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm3', is_training=training, reuse=if_reuse)
+
+        node1 = add([node1_0, node1_1], name='d_n1/add')
+        node1 = tf.nn.leaky_relu(node1)
 
         return node1
 
-    def get_discriminator(self, x_var, c_var):
-        x_code = self.d_image_template.construct(input=x_var)  # s16 * s16 * df_dim*8
+    def get_discriminator(self, x_var, c_var, is_training, no_reuse=None):
+        if cfg.GAN.NETWORK_TYPE == "default":
+            x_code = self.d_encode_image(training=is_training, inputs=x_var, if_reuse=no_reuse)  # s16 * s16 * df_dim*8
 
-        c_code = self.d_context_template.construct(input=c_var)
-        c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1)
-        c_code = tf.tile(c_code, [1, self.s16, self.s16, 1])  # s16 * s16 * ef_dim
+            c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse)
+            c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1)
+            c_code = tf.tile(c_code, [1, self.s16, self.s16, 1])  # s16 * s16 * ef_dim
 
-        x_c_code = tf.concat(3, [x_code, c_code])
-        return self.d_discriminator_template.construct(input=x_c_code)
+            x_c_code = tf.concat([x_code, c_code], 3)
+            return self.discriminator(training=is_training, inputs=x_c_code, if_reuse=no_reuse)
+        else:
+            raise NotImplementedError
 
     # hr_d_net
-    def hr_d_encode_image(self):
-        node1_0 = \
-            (pt.template("input").  # 4s * 4s * 3
-             custom_conv2d(self.df_dim, k_h=4, k_w=4).  # 2s * 2s * df_dim
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 2, k_h=4, k_w=4).  # s * s * df_dim*2
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 4, k_h=4, k_w=4).  # s2 * s2 * df_dim*4
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 8, k_h=4, k_w=4).  # s4 * s4 * df_dim*8
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 16, k_h=4, k_w=4).  # s8 * s8 * df_dim*16
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 32, k_h=4, k_w=4).  # s16 * s16 * df_dim*32
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 16, k_h=1, k_w=1, d_h=1, d_w=1).  # s16 * s16 * df_dim*16
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 8, k_h=1, k_w=1, d_h=1, d_w=1).  # s16 * s16 * df_dim*8
-             conv_batch_norm())
-        node1_1 = \
-            (node1_0.
-             custom_conv2d(self.df_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm().
-             apply(leaky_rectify, leakiness=0.2).
-             custom_conv2d(self.df_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1).
-             conv_batch_norm())
-
-        node1 = \
-            (node1_0.
-             apply(tf.add, node1_1).
-             apply(leaky_rectify, leakiness=0.2))
+    def hr_d_encode_image(self, inputs=None, training=True, if_reuse=None):
+        #  input:  4s * 4s * 3
+        node1_0 = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, activation_fn=tf.nn.leaky_relu,
+                         name='hr_d_encode_n1.0/conv2d1', reuse=if_reuse)  # 2s * 2s * df_dim
+
+        # s * s * df_dim*2
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='hr_d_encode_n1.0/conv2d2', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        # s2 * s2 * df_dim*4
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='hr_d_encode_n1.0/conv2d3', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm2', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        # s4 * s4 * df_dim*8
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='hr_d_encode_n1.0/conv2d4', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm3', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        # s8 * s8 * df_dim*16
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 16, 2, 2, name='hr_d_encode_n1.0/conv2d5', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm4', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        # s16 * s16 * df_dim*32
+        node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 32, 2, 2, name='hr_d_encode_n1.0/conv2d6', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm5', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        # s16 * s16 * df_dim*16
+        node1_0 = Conv2d(node1_0, 1, 1, self.df_dim * 16, 1, 1, name='hr_d_encode_n1.0/conv2d7', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm6', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        # s16 * s16 * df_dim*8
+        node1_0 = Conv2d(node1_0, 1, 1, self.df_dim * 8, 1, 1, name='hr_d_encode_n1.0/conv2d8', reuse=if_reuse)
+        node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm7', is_training=training,
+                                           reuse=if_reuse)
+
+        node1_1 = Conv2d(node1_0, 1, 1, self.df_dim * 2, 1, 1, name='hr_d_encode_n1.1/conv2d', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'hr_d_encode_n1.1/batch_norm', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 2, 1, 1, name='hr_d_encode_n1.1/conv2d2', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'hr_d_encode_n1.1/batch_norm2', is_training=training,
+                                           activation_fn=tf.nn.leaky_relu, reuse=if_reuse)
+        node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 8, 1, 1, name='hr_d_encode_n1.1/conv2d3', reuse=if_reuse)
+        node1_1 = conv_batch_normalization(node1_1, 'hr_d_encode_n1.1/batch_norm3', is_training=training,
+                                           reuse=if_reuse)
+
+        node1 = add([node1_0, node1_1], name='hr_d_encode_n1/add')
+        node1 = tf.nn.leaky_relu(node1)
 
         return node1
 
-    def hr_get_discriminator(self, x_var, c_var):
-        x_code = self.hr_d_image_template.construct(input=x_var)  # s16 * s16 * df_dim*8
+    def hr_get_discriminator(self, x_var, c_var, is_training, no_reuse=None):
+        if cfg.GAN.NETWORK_TYPE == "default":
+            # s16 * s16 * df_dim*8
+            x_code = self.hr_d_encode_image(training=is_training, inputs=x_var, if_reuse=no_reuse)
 
-        c_code = self.hr_d_context_template.construct(input=c_var)
-        c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1)
-        c_code = tf.tile(c_code, [1, self.s16, self.s16, 1])  # s16 * s16 * ef_dim
+            c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse)
+            c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1)
+            c_code = tf.tile(c_code, [1, self.s16, self.s16, 1])  # s16 * s16 * ef_dim
 
-        x_c_code = tf.concat(3, [x_code, c_code])
-        return self.hr_discriminator_template.construct(input=x_c_code)
+            x_c_code = tf.concat([x_code, c_code], 3)
+            return self.discriminator(training=is_training, inputs=x_c_code, if_reuse=no_reuse)
+        else:
+            raise NotImplementedError
diff --git a/stageII/run_exp.py b/stageII/run_exp.py
index 1ab9a7d..1285112 100644
--- a/stageII/run_exp.py
+++ b/stageII/run_exp.py
@@ -1,28 +1,26 @@
 from __future__ import division
 from __future__ import print_function
 
-import tensorflow as tf
-import dateutil
 import dateutil.tz
 import datetime
 import argparse
 import pprint
 
-from misc.datasets import TextDataset
-from stageII.model import CondGAN
-from stageII.trainer import CondGANTrainer
-from misc.utils import mkdir_p
-from misc.config import cfg, cfg_from_file
+import sys
+sys.path.append('misc')
+sys.path.append('stageII')
+
+from datasets import TextDataset
+from utils import mkdir_p
+from config import cfg, cfg_from_file
+from model import CondGAN
+from trainer import CondGANTrainer
 
 
 def parse_args():
     parser = argparse.ArgumentParser(description='Train a GAN network')
-    parser.add_argument('--cfg', dest='cfg_file',
-                        help='optional config file',
-                        default=None, type=str)
-    parser.add_argument('--gpu', dest='gpu_id',
-                        help='GPU device id to use [0]',
-                        default=-1, type=int)
+    parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str)
+    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=-1, type=int)
     # if len(sys.argv) == 1:
     #    parser.print_help()
     #    sys.exit(1)
@@ -49,23 +47,14 @@ def parse_args():
     if cfg.TRAIN.FLAG:
         filename_train = '%s/train' % (datadir)
         dataset.train = dataset.get_data(filename_train)
-        ckt_logs_dir = "ckt_logs/%s/%s_%s" % \
-            (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
+        ckt_logs_dir = "ckt_logs/%s/%s_%s" % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
         mkdir_p(ckt_logs_dir)
     else:
         s_tmp = cfg.TRAIN.PRETRAINED_MODEL
         ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')]
+    model = CondGAN(lr_imsize=int(dataset.image_shape[0] / dataset.hr_lr_ratio), hr_lr_ratio=dataset.hr_lr_ratio)
 
-    model = CondGAN(
-        lr_imsize=int(dataset.image_shape[0] / dataset.hr_lr_ratio),
-        hr_lr_ratio=dataset.hr_lr_ratio
-    )
-
-    algo = CondGANTrainer(
-        model=model,
-        dataset=dataset,
-        ckt_logs_dir=ckt_logs_dir
-    )
+    algo = CondGANTrainer(model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir)
 
     if cfg.TRAIN.FLAG:
         algo.train()
diff --git a/stageII/trainer.py b/stageII/trainer.py
index 457405c..3899f01 100644
--- a/stageII/trainer.py
+++ b/stageII/trainer.py
@@ -1,19 +1,20 @@
 from __future__ import division
 from __future__ import print_function
 
-import prettytensor as pt
 import tensorflow as tf
 import numpy as np
-import scipy.misc
+import imageio
 import os
-import sys
 from six.moves import range
 from progressbar import ETA, Bar, Percentage, ProgressBar
 from PIL import Image, ImageDraw, ImageFont
 
+import sys
+sys.path.append('misc')
 
-from misc.config import cfg
-from misc.utils import mkdir_p
+from config import cfg
+from utils import mkdir_p
+from skimage.transform import resize
 
 TINY = 1e-8
 
@@ -27,12 +28,7 @@ def KL_loss(mu, log_sigma):
 
 
 class CondGANTrainer(object):
-    def __init__(self,
-                 model,
-                 dataset=None,
-                 exp_name="model",
-                 ckt_logs_dir="ckt_logs",
-                 ):
+    def __init__(self, model, dataset=None, exp_name="model", ckt_logs_dir="ckt_logs"):
         """
         :type model: RegularizedGAN
         """
@@ -51,39 +47,24 @@ def __init__(self,
 
         self.hr_image_shape = self.dataset.image_shape
         ratio = self.dataset.hr_lr_ratio
-        self.lr_image_shape = [int(self.hr_image_shape[0] / ratio),
-                               int(self.hr_image_shape[1] / ratio),
+        self.lr_image_shape = [int(self.hr_image_shape[0] / ratio), int(self.hr_image_shape[1] / ratio),
                                self.hr_image_shape[2]]
         print('hr_image_shape', self.hr_image_shape)
         print('lr_image_shape', self.lr_image_shape)
 
     def build_placeholder(self):
         '''Helper function for init_opt'''
-        self.hr_images = tf.placeholder(
-            tf.float32, [self.batch_size] + self.hr_image_shape,
-            name='real_hr_images')
-        self.hr_wrong_images = tf.placeholder(
-            tf.float32, [self.batch_size] + self.hr_image_shape,
-            name='wrong_hr_images'
-        )
-        self.embeddings = tf.placeholder(
-            tf.float32, [self.batch_size] + self.dataset.embedding_shape,
-            name='conditional_embeddings'
-        )
-
-        self.generator_lr = tf.placeholder(
-            tf.float32, [],
-            name='generator_learning_rate'
-        )
-        self.discriminator_lr = tf.placeholder(
-            tf.float32, [],
-            name='discriminator_learning_rate'
-        )
+        self.hr_images = tf.placeholder(tf.float32, [self.batch_size] + self.hr_image_shape, name='real_hr_images')
+        self.hr_wrong_images = tf.placeholder(tf.float32, [self.batch_size] + self.hr_image_shape,
+                                              name='wrong_hr_images')
+        self.embeddings = tf.placeholder(tf.float32, [self.batch_size] + self.dataset.embedding_shape,
+                                         name='conditional_embeddings')
+
+        self.generator_lr = tf.placeholder(tf.float32, [], name='generator_learning_rate')
+        self.discriminator_lr = tf.placeholder(tf.float32, [], name='discriminator_learning_rate')
         #
-        self.images = tf.image.resize_bilinear(self.hr_images,
-                                               self.lr_image_shape[:2])
-        self.wrong_images = tf.image.resize_bilinear(self.hr_wrong_images,
-                                                     self.lr_image_shape[:2])
+        self.images = tf.image.resize_bilinear(self.hr_images, self.lr_image_shape[:2])
+        self.wrong_images = tf.image.resize_bilinear(self.hr_wrong_images, self.lr_image_shape[:2])
 
     def sample_encoded_context(self, embeddings):
         '''Helper function for init_opt'''
@@ -107,95 +88,73 @@ def sample_encoded_context(self, embeddings):
     def init_opt(self):
         self.build_placeholder()
 
-        with pt.defaults_scope(phase=pt.Phase.train):
-            # ####get output from G network####################################
-            with tf.variable_scope("g_net"):
-                c, kl_loss = self.sample_encoded_context(self.embeddings)
-                z = tf.random_normal([self.batch_size, cfg.Z_DIM])
-                self.log_vars.append(("hist_c", c))
-                self.log_vars.append(("hist_z", z))
-                fake_images = self.model.get_generator(tf.concat(1, [c, z]))
-
+        # ####get output from G network####################################
+        with tf.variable_scope("g_net"):  # For training
+            c, kl_loss = self.sample_encoded_context(self.embeddings)
+            z = tf.random_normal([self.batch_size, cfg.Z_DIM])
+            self.log_vars.append(("hist_c", c))
+            self.log_vars.append(("hist_z", z))
+            fake_images = self.model.get_generator(tf.concat([c, z], 1), True)
+        with tf.variable_scope("d_net"):  # For training
             # ####get discriminator_loss and generator_loss ###################
-            discriminator_loss, generator_loss =\
-                self.compute_losses(self.images,
-                                    self.wrong_images,
-                                    fake_images,
-                                    self.embeddings,
-                                    flag='lr')
+            discriminator_loss, generator_loss = self.compute_losses(self.images, self.wrong_images, fake_images,
+                                                                     self.embeddings, flag='lr')
             generator_loss += kl_loss
             self.log_vars.append(("g_loss_kl_loss", kl_loss))
             self.log_vars.append(("g_loss", generator_loss))
             self.log_vars.append(("d_loss", discriminator_loss))
 
-            # #### For hr_g and hr_d #########################################
-            with tf.variable_scope("hr_g_net"):
-                hr_c, hr_kl_loss = self.sample_encoded_context(self.embeddings)
-                self.log_vars.append(("hist_hr_c", hr_c))
-                hr_fake_images = self.model.hr_get_generator(fake_images, hr_c)
+        # #### For hr_g and hr_d #########################################
+        with tf.variable_scope("hr_g_net"):  # For training
+            hr_c, hr_kl_loss = self.sample_encoded_context(self.embeddings)
+            self.log_vars.append(("hist_hr_c", hr_c))
+            hr_fake_images = self.model.hr_get_generator(fake_images, hr_c, True)
+
+        with tf.variable_scope("hr_d_net"):  # For training
             # get losses
-            hr_discriminator_loss, hr_generator_loss =\
-                self.compute_losses(self.hr_images,
-                                    self.hr_wrong_images,
-                                    hr_fake_images,
-                                    self.embeddings,
-                                    flag='hr')
+            hr_discriminator_loss, hr_generator_loss = self.compute_losses(self.hr_images, self.hr_wrong_images,
+                                                                           hr_fake_images, self.embeddings, flag='hr')
             hr_generator_loss += hr_kl_loss
             self.log_vars.append(("hr_g_loss", hr_generator_loss))
             self.log_vars.append(("hr_d_loss", hr_discriminator_loss))
 
             # #######define self.g_sum, self.d_sum,....########################
-            self.prepare_trainer(discriminator_loss, generator_loss,
-                                 hr_discriminator_loss, hr_generator_loss)
+            self.prepare_trainer(discriminator_loss, generator_loss, hr_discriminator_loss, hr_generator_loss)
             self.define_summaries()
 
-        with pt.defaults_scope(phase=pt.Phase.test):
-            self.sampler()
-            self.visualization(cfg.TRAIN.NUM_COPY)
-            print("success")
+        self.sampler()
+        self.visualization(cfg.TRAIN.NUM_COPY)
+        print("success")
 
     def sampler(self):
-        with tf.variable_scope("g_net", reuse=True):
+        with tf.variable_scope("g_net", reuse=True):  # For testing
             c, _ = self.sample_encoded_context(self.embeddings)
             z = tf.random_normal([self.batch_size, cfg.Z_DIM])
-            self.fake_images = self.model.get_generator(tf.concat(1, [c, z]))
-        with tf.variable_scope("hr_g_net", reuse=True):
+            self.fake_images = self.model.get_generator(tf.concat([c, z], 1), False)
+        with tf.variable_scope("hr_g_net", reuse=True):  # For testing
             hr_c, _ = self.sample_encoded_context(self.embeddings)
-            self.hr_fake_images =\
-                self.model.hr_get_generator(self.fake_images, hr_c)
+            self.hr_fake_images = self.model.hr_get_generator(self.fake_images, hr_c, False)
 
-    def compute_losses(self, images, wrong_images,
-                       fake_images, embeddings, flag='lr'):
+    def compute_losses(self, images, wrong_images, fake_images, embeddings, flag='lr'):
         if flag == 'lr':
-            real_logit =\
-                self.model.get_discriminator(images, embeddings)
-            wrong_logit =\
-                self.model.get_discriminator(wrong_images, embeddings)
-            fake_logit =\
-                self.model.get_discriminator(fake_images, embeddings)
+            real_logit = self.model.get_discriminator(images, embeddings, True)
+            # Reuse the weights
+            wrong_logit = self.model.get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE)
+            fake_logit = self.model.get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE)
         else:
-            real_logit =\
-                self.model.hr_get_discriminator(images, embeddings)
-            wrong_logit =\
-                self.model.hr_get_discriminator(wrong_images, embeddings)
-            fake_logit =\
-                self.model.hr_get_discriminator(fake_images, embeddings)
-
-        real_d_loss =\
-            tf.nn.sigmoid_cross_entropy_with_logits(real_logit,
-                                                    tf.ones_like(real_logit))
+            real_logit = self.model.hr_get_discriminator(images, embeddings, True)
+            # Reuse the weights
+            wrong_logit = self.model.hr_get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE)
+            fake_logit = self.model.hr_get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE)
+
+        real_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=real_logit, labels=tf.ones_like(real_logit))
         real_d_loss = tf.reduce_mean(real_d_loss)
-        wrong_d_loss =\
-            tf.nn.sigmoid_cross_entropy_with_logits(wrong_logit,
-                                                    tf.zeros_like(wrong_logit))
+        wrong_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=wrong_logit, labels=tf.zeros_like(wrong_logit))
         wrong_d_loss = tf.reduce_mean(wrong_d_loss)
-        fake_d_loss =\
-            tf.nn.sigmoid_cross_entropy_with_logits(fake_logit,
-                                                    tf.zeros_like(fake_logit))
+        fake_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logit, labels=tf.zeros_like(fake_logit))
         fake_d_loss = tf.reduce_mean(fake_d_loss)
         if cfg.TRAIN.B_WRONG:
-            discriminator_loss =\
-                real_d_loss + (wrong_d_loss + fake_d_loss) / 2.
+            discriminator_loss = real_d_loss + (wrong_d_loss + fake_d_loss) / 2.
         else:
             discriminator_loss = real_d_loss + fake_d_loss
         if flag == 'lr':
@@ -209,9 +168,7 @@ def compute_losses(self, images, wrong_images,
             if cfg.TRAIN.B_WRONG:
                 self.log_vars.append(("hr_d_loss_wrong", wrong_d_loss))
 
-        generator_loss = \
-            tf.nn.sigmoid_cross_entropy_with_logits(fake_logit,
-                                                    tf.ones_like(fake_logit))
+        generator_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logit, labels=tf.ones_like(fake_logit))
         generator_loss = tf.reduce_mean(generator_loss)
         if flag == 'lr':
             self.log_vars.append(("g_loss_fake", generator_loss))
@@ -223,37 +180,25 @@ def compute_losses(self, images, wrong_images,
     def define_one_trainer(self, loss, learning_rate, key_word):
         '''Helper function for init_opt'''
         all_vars = tf.trainable_variables()
-        tarin_vars = [var for var in all_vars if
-                      var.name.startswith(key_word)]
+        tarin_vars = [var for var in all_vars if var.name.startswith(key_word)]
+
+        # Update the specific weights
+        update_ops_vars = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith(key_word)]
+        # Only update the moving mean and variance (from the batch normalization)
+        with tf.control_dependencies(update_ops_vars):
+            opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5)
+            trainer = opt.minimize(loss, var_list=tarin_vars)
 
-        opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5)
-        trainer = pt.apply_optimizer(opt, losses=[loss], var_list=tarin_vars)
         return trainer
 
-    def prepare_trainer(self, discriminator_loss, generator_loss,
-                        hr_discriminator_loss, hr_generator_loss):
+    def prepare_trainer(self, discriminator_loss, generator_loss, hr_discriminator_loss, hr_generator_loss):
         ft_lr_retio = cfg.TRAIN.FT_LR_RETIO
-        self.discriminator_trainer =\
-            self.define_one_trainer(discriminator_loss,
-                                    self.discriminator_lr * ft_lr_retio,
-                                    'd_')
-        self.generator_trainer =\
-            self.define_one_trainer(generator_loss,
-                                    self.generator_lr * ft_lr_retio,
-                                    'g_')
-        self.hr_discriminator_trainer =\
-            self.define_one_trainer(hr_discriminator_loss,
-                                    self.discriminator_lr,
-                                    'hr_d_')
-        self.hr_generator_trainer =\
-            self.define_one_trainer(hr_generator_loss,
-                                    self.generator_lr,
-                                    'hr_g_')
-
-        self.ft_generator_trainer = \
-            self.define_one_trainer(hr_generator_loss,
-                                    self.generator_lr * cfg.TRAIN.FT_LR_RETIO,
-                                    'g_')
+        self.discriminator_trainer = self.define_one_trainer(discriminator_loss, self.discriminator_lr * ft_lr_retio, 'd_')
+        self.generator_trainer = self.define_one_trainer(generator_loss, self.generator_lr * ft_lr_retio, 'g_')
+        self.hr_discriminator_trainer = self.define_one_trainer(hr_discriminator_loss, self.discriminator_lr, 'hr_d_')
+        self.hr_generator_trainer = self.define_one_trainer(hr_generator_loss, self.generator_lr, 'hr_g_')
+
+        self.ft_generator_trainer = self.define_one_trainer(hr_generator_loss, self.generator_lr * cfg.TRAIN.FT_LR_RETIO, 'g_')
 
         self.log_vars.append(("hr_d_learning_rate", self.discriminator_lr))
         self.log_vars.append(("hr_g_learning_rate", self.generator_lr))
@@ -263,21 +208,21 @@ def define_summaries(self):
         all_sum = {'g': [], 'd': [], 'hr_g': [], 'hr_d': [], 'hist': []}
         for k, v in self.log_vars:
             if k.startswith('g'):
-                all_sum['g'].append(tf.scalar_summary(k, v))
+                all_sum['g'].append(tf.summary.scalar(k, v))
             elif k.startswith('d'):
-                all_sum['d'].append(tf.scalar_summary(k, v))
+                all_sum['d'].append(tf.summary.scalar(k, v))
             elif k.startswith('hr_g'):
-                all_sum['hr_g'].append(tf.scalar_summary(k, v))
+                all_sum['hr_g'].append(tf.summary.scalar(k, v))
             elif k.startswith('hr_d'):
-                all_sum['hr_d'].append(tf.scalar_summary(k, v))
+                all_sum['hr_d'].append(tf.summary.scalar(k, v))
             elif k.startswith('hist'):
-                all_sum['hist'].append(tf.histogram_summary(k, v))
+                all_sum['hist'].append(tf.summary.histogram(k, v))
 
-        self.g_sum = tf.merge_summary(all_sum['g'])
-        self.d_sum = tf.merge_summary(all_sum['d'])
-        self.hr_g_sum = tf.merge_summary(all_sum['hr_g'])
-        self.hr_d_sum = tf.merge_summary(all_sum['hr_d'])
-        self.hist_sum = tf.merge_summary(all_sum['hist'])
+        self.g_sum = tf.summary.merge(all_sum['g'])
+        self.d_sum = tf.summary.merge(all_sum['d'])
+        self.hr_g_sum = tf.summary.merge(all_sum['hr_g'])
+        self.hr_d_sum = tf.summary.merge(all_sum['hr_d'])
+        self.hist_sum = tf.summary.merge(all_sum['hist'])
 
     def visualize_one_superimage(self, img_var, images, rows, filename):
         stacked_img = []
@@ -287,35 +232,27 @@ def visualize_one_superimage(self, img_var, images, rows, filename):
             for col in range(rows):
                 row_img.append(img_var[row * rows + col, :, :, :])
             # each rows is 1realimage +10_fakeimage
-            stacked_img.append(tf.concat(1, row_img))
-        imgs = tf.expand_dims(tf.concat(0, stacked_img), 0)
-        current_img_summary = tf.image_summary(filename, imgs)
+            stacked_img.append(tf.concat(row_img, 1))
+        imgs = tf.expand_dims(tf.concat(stacked_img, 0), 0)
+        current_img_summary = tf.summary.image(filename, imgs)
         return current_img_summary, imgs
 
     def visualization(self, n):
-        fake_sum_train, superimage_train =\
-            self.visualize_one_superimage(self.fake_images[:n * n],
-                                          self.images[:n * n],
-                                          n, "train")
-        fake_sum_test, superimage_test =\
-            self.visualize_one_superimage(self.fake_images[n * n:2 * n * n],
-                                          self.images[n * n:2 * n * n],
-                                          n, "test")
-        self.superimages = tf.concat(0, [superimage_train, superimage_test])
-        self.image_summary = tf.merge_summary([fake_sum_train, fake_sum_test])
-
-        hr_fake_sum_train, hr_superimage_train =\
-            self.visualize_one_superimage(self.hr_fake_images[:n * n],
-                                          self.hr_images[:n * n, :, :, :],
-                                          n, "hr_train")
-        hr_fake_sum_test, hr_superimage_test =\
-            self.visualize_one_superimage(self.hr_fake_images[n * n:2 * n * n],
-                                          self.hr_images[n * n:2 * n * n],
-                                          n, "hr_test")
-        self.hr_superimages =\
-            tf.concat(0, [hr_superimage_train, hr_superimage_test])
-        self.hr_image_summary =\
-            tf.merge_summary([hr_fake_sum_train, hr_fake_sum_test])
+        fake_sum_train, superimage_train = self.visualize_one_superimage(self.fake_images[:n * n], self.images[:n * n],
+                                                                         n, "train")
+        fake_sum_test, superimage_test = self.visualize_one_superimage(self.fake_images[n * n:2 * n * n],
+                                                                       self.images[n * n:2 * n * n], n, "test")
+        self.superimages = tf.concat([superimage_train, superimage_test], 0)
+        self.image_summary = tf.summary.merge([fake_sum_train, fake_sum_test])
+
+        hr_fake_sum_train, hr_superimage_train = self.visualize_one_superimage(self.hr_fake_images[:n * n],
+                                                                               self.hr_images[:n * n, :, :, :], n,
+                                                                               "hr_train")
+        hr_fake_sum_test, hr_superimage_test = self.visualize_one_superimage(self.hr_fake_images[n * n:2 * n * n],
+                                                                             self.hr_images[n * n:2 * n * n], n,
+                                                                             "hr_test")
+        self.hr_superimages = tf.concat([hr_superimage_train, hr_superimage_test], 0)
+        self.hr_image_summary = tf.summary.merge([hr_fake_sum_train, hr_fake_sum_test])
 
     def preprocess(self, x, n):
         # make sure every row with n column have the same embeddings
@@ -325,43 +262,33 @@ def preprocess(self, x, n):
         return x
 
     def epoch_sum_images(self, sess, n):
-        images_train, _, embeddings_train, captions_train, _ =\
-            self.dataset.train.next_batch(n * n, cfg.TRAIN.NUM_EMBEDDING)
+        images_train, _, embeddings_train, captions_train, _ = self.dataset.train.next_batch(n * n,
+                                                                                             cfg.TRAIN.NUM_EMBEDDING)
         images_train = self.preprocess(images_train, n)
         embeddings_train = self.preprocess(embeddings_train, n)
 
-        images_test, _, embeddings_test, captions_test, _ =\
-            self.dataset.test.next_batch(n * n, 1)
+        images_test, _, embeddings_test, captions_test, _ = self.dataset.test.next_batch(n * n, 1)
         images_test = self.preprocess(images_test, n)
         embeddings_test = self.preprocess(embeddings_test, n)
 
         images = np.concatenate([images_train, images_test], axis=0)
-        embeddings =\
-            np.concatenate([embeddings_train, embeddings_test], axis=0)
+        embeddings = np.concatenate([embeddings_train, embeddings_test], axis=0)
 
         if self.batch_size > 2 * n * n:
-            images_pad, _, embeddings_pad, _, _ =\
-                self.dataset.test.next_batch(self.batch_size - 2 * n * n, 1)
+            images_pad, _, embeddings_pad, _, _ = self.dataset.test.next_batch(self.batch_size - 2 * n * n, 1)
             images = np.concatenate([images, images_pad], axis=0)
             embeddings = np.concatenate([embeddings, embeddings_pad], axis=0)
 
-        feed_out = [self.superimages, self.image_summary,
-                    self.hr_superimages, self.hr_image_summary]
-        feed_dict = {self.hr_images: images,
-                     self.embeddings: embeddings}
-        gen_samples, img_summary, hr_gen_samples, hr_img_summary =\
-            sess.run(feed_out, feed_dict)
+        feed_out = [self.superimages, self.image_summary, self.hr_superimages, self.hr_image_summary]
+        feed_dict = {self.hr_images: images, self.embeddings: embeddings}
+        gen_samples, img_summary, hr_gen_samples, hr_img_summary = sess.run(feed_out, feed_dict)
 
         # save images generated for train and test captions
-        scipy.misc.imsave('%s/lr_fake_train.jpg' %
-                          (self.log_dir), gen_samples[0])
-        scipy.misc.imsave('%s/lr_fake_test.jpg' %
-                          (self.log_dir), gen_samples[1])
+        imageio.imwrite('%s/lr_fake_train.jpg' % (self.log_dir), gen_samples[0])
+        imageio.imwrite('%s/lr_fake_test.jpg' % (self.log_dir), gen_samples[1])
         #
-        scipy.misc.imsave('%s/hr_fake_train.jpg' %
-                          (self.log_dir), hr_gen_samples[0])
-        scipy.misc.imsave('%s/hr_fake_test.jpg' %
-                          (self.log_dir), hr_gen_samples[1])
+        imageio.imwrite('%s/hr_fake_train.jpg' % (self.log_dir), hr_gen_samples[0])
+        imageio.imwrite('%s/hr_fake_test.jpg' % (self.log_dir), hr_gen_samples[1])
 
         # pfi_train = open(self.log_dir + "/train.txt", "w")
         pfi_test = open(self.log_dir + "/test.txt", "w")
@@ -378,17 +305,11 @@ def epoch_sum_images(self, sess, n):
 
     def build_model(self, sess):
         self.init_opt()
-
-        sess.run(tf.initialize_all_variables())
+        sess.run(tf.global_variables_initializer())
         if len(self.model_path) > 0:
             print("Reading model parameters from %s" % self.model_path)
             all_vars = tf.trainable_variables()
-            # all_vars = tf.all_variables()
-            restore_vars = []
-            for var in all_vars:
-                if var.name.startswith('g_') or var.name.startswith('d_'):
-                    restore_vars.append(var)
-                    # print(var.name)
+            restore_vars = [var for var in all_vars if var.name.startswith('g_') or var.name.startswith('d_')]
             saver = tf.train.Saver(restore_vars)
             saver.restore(sess, self.model_path)
 
@@ -401,56 +322,46 @@ def build_model(self, sess):
             counter = 0
         return counter
 
-    def train_one_step(self, generator_lr,
-                       discriminator_lr,
-                       counter, summary_writer, log_vars, sess):
+    def train_one_step(self, generator_lr, discriminator_lr, counter, summary_writer, log_vars, sess):
         # training d
-        hr_images, hr_wrong_images, embeddings, _, _ =\
-            self.dataset.train.next_batch(self.batch_size,
-                                          cfg.TRAIN.NUM_EMBEDDING)
+        hr_images, hr_wrong_images, embeddings, _, _ = self.dataset.train.next_batch(self.batch_size,
+                                                                                     cfg.TRAIN.NUM_EMBEDDING)
         feed_dict = {self.hr_images: hr_images,
                      self.hr_wrong_images: hr_wrong_images,
                      self.embeddings: embeddings,
                      self.generator_lr: generator_lr,
-                     self.discriminator_lr: discriminator_lr
-                     }
+                     self.discriminator_lr: discriminator_lr}
         if cfg.TRAIN.FINETUNE_LR:
             # train d1
-            feed_out_d = [self.hr_discriminator_trainer,
-                          self.hr_d_sum,
-                          log_vars,
-                          self.hist_sum]
+            feed_out_d = [self.hr_discriminator_trainer, self.hr_d_sum, log_vars, self.hist_sum]
             ret_list = sess.run(feed_out_d, feed_dict)
             summary_writer.add_summary(ret_list[1], counter)
             log_vals = ret_list[2]
             summary_writer.add_summary(ret_list[3], counter)
+
             # train g1 and finetune g0 with the loss of g1
-            feed_out_g = [self.hr_generator_trainer,
-                          self.ft_generator_trainer,
-                          self.hr_g_sum]
+            feed_out_g = [self.hr_generator_trainer, self.ft_generator_trainer, self.hr_g_sum]
             _, _, hr_g_sum = sess.run(feed_out_g, feed_dict)
             summary_writer.add_summary(hr_g_sum, counter)
+
             # finetune d0 with the loss of d0
             feed_out_d = [self.discriminator_trainer, self.d_sum]
             _, d_sum = sess.run(feed_out_d, feed_dict)
             summary_writer.add_summary(d_sum, counter)
+
             # finetune g0 with the loss of g0
             feed_out_g = [self.generator_trainer, self.g_sum]
             _, g_sum = sess.run(feed_out_g, feed_dict)
             summary_writer.add_summary(g_sum, counter)
         else:
             # train d1
-            feed_out_d = [self.hr_discriminator_trainer,
-                          self.hr_d_sum,
-                          log_vars,
-                          self.hist_sum]
+            feed_out_d = [self.hr_discriminator_trainer, self.hr_d_sum, log_vars, self.hist_sum]
             ret_list = sess.run(feed_out_d, feed_dict)
             summary_writer.add_summary(ret_list[1], counter)
             log_vals = ret_list[2]
             summary_writer.add_summary(ret_list[3], counter)
             # train g1
-            feed_out_g = [self.hr_generator_trainer,
-                          self.hr_g_sum]
+            feed_out_g = [self.hr_generator_trainer, self.hr_g_sum]
             _, hr_g_sum = sess.run(feed_out_g, feed_dict)
             summary_writer.add_summary(hr_g_sum, counter)
 
@@ -461,12 +372,10 @@ def train(self):
         with tf.Session(config=config) as sess:
             with tf.device("/gpu:%d" % cfg.GPU_ID):
                 counter = self.build_model(sess)
-                saver = tf.train.Saver(tf.all_variables(),
-                                       keep_checkpoint_every_n_hours=5)
+                saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=5)
 
                 # summary_op = tf.merge_all_summaries()
-                summary_writer = tf.train.SummaryWriter(self.log_dir,
-                                                        sess.graph)
+                summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph)
 
                 if cfg.TRAIN.FINETUNE_LR:
                     keys = ["hr_d_loss", "hr_g_loss", "d_loss", "g_loss"]
@@ -487,10 +396,8 @@ def train(self):
                 decay_start = cfg.TRAIN.PRETRAINED_EPOCH
                 epoch_start = int(counter / updates_per_epoch)
                 for epoch in range(epoch_start, self.max_epoch):
-                    widgets = ["epoch #%d|" % epoch,
-                               Percentage(), Bar(), ETA()]
-                    pbar = ProgressBar(maxval=updates_per_epoch,
-                                       widgets=widgets)
+                    widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
+                    pbar = ProgressBar(maxval=updates_per_epoch, widgets=widgets)
                     pbar.start()
 
                     if epoch % lr_decay_step == 0 and epoch > decay_start:
@@ -500,23 +407,17 @@ def train(self):
                     all_log_vals = []
                     for i in range(updates_per_epoch):
                         pbar.update(i)
-                        log_vals = self.train_one_step(generator_lr,
-                                                       discriminator_lr,
-                                                       counter, summary_writer,
+                        log_vals = self.train_one_step(generator_lr, discriminator_lr, counter, summary_writer,
                                                        log_vars, sess)
                         all_log_vals.append(log_vals)
                         # save checkpoint
                         counter += 1
                         if counter % self.snapshot_interval == 0:
-                            snapshot_path = "%s/%s_%s.ckpt" %\
-                                             (self.checkpoint_dir,
-                                              self.exp_name,
-                                              str(counter))
+                            snapshot_path = "%s/%s_%s.ckpt" % (self.checkpoint_dir, self.exp_name, str(counter))
                             fn = saver.save(sess, snapshot_path)
                             print("Model saved in file: %s" % fn)
 
-                    img_summary, img_summary2 =\
-                        self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY)
+                    img_summary, img_summary2 = self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY)
                     summary_writer.add_summary(img_summary, counter)
                     summary_writer.add_summary(img_summary2, counter)
 
@@ -526,9 +427,7 @@ def train(self):
                         dic_logs[k] = v
                         # print(k, v)
 
-                    log_line = "; ".join("%s: %s" %
-                                         (str(k), str(dic_logs[k]))
-                                         for k in dic_logs)
+                    log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k])) for k in dic_logs)
                     print("Epoch %d | " % (epoch) + log_line)
                     sys.stdout.flush()
                     if np.any(np.isnan(avg_log_vals)):
@@ -559,15 +458,13 @@ def drawCaption(self, img, caption):
 
         return img_txt
 
-    def save_super_images(self, images, sample_batchs, hr_sample_batchs,
-                          savenames, captions_batchs,
-                          sentenceID, save_dir, subset):
+    def save_super_images(self, images, sample_batchs, hr_sample_batchs, savenames, captions_batchs, sentenceID,
+                          save_dir, subset):
         # batch_size samples for each embedding
         # Up to 16 samples for each text embedding/sentence
         numSamples = len(sample_batchs)
         for j in range(len(savenames)):
-            s_tmp = '%s-1real-%dsamples/%s/%s' %\
-                (save_dir, numSamples, subset, savenames[j])
+            s_tmp = '%s-1real-%dsamples/%s/%s' % (save_dir, numSamples, subset, savenames[j])
             folder = s_tmp[:s_tmp.rfind('/')]
             if not os.path.isdir(folder):
                 print('Make a new folder: ', folder)
@@ -583,9 +480,10 @@ def save_super_images(self, images, sample_batchs, hr_sample_batchs,
             row2 = [padding0, real_img, padding]
             for i in range(np.minimum(8, numSamples)):
                 lr_img = sample_batchs[i][j]
+                lr_img = (lr_img + 1.0) * 127.5
                 hr_img = hr_sample_batchs[i][j]
                 hr_img = (hr_img + 1.0) * 127.5
-                re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
+                re_sample = resize(lr_img, hr_img.shape[:2])
                 row1.append(re_sample)
                 row2.append(hr_img)
             row1 = np.concatenate(row1, axis=1)
@@ -598,38 +496,34 @@ def save_super_images(self, images, sample_batchs, hr_sample_batchs,
                 row2 = [padding0, real_img, padding]
                 for i in range(8, len(sample_batchs)):
                     lr_img = sample_batchs[i][j]
+                    lr_img = (lr_img + 1.0) * 127.5
                     hr_img = hr_sample_batchs[i][j]
                     hr_img = (hr_img + 1.0) * 127.5
-                    re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2])
+                    re_sample = resize(lr_img, hr_img.shape[:2])
                     row1.append(re_sample)
                     row2.append(hr_img)
                 row1 = np.concatenate(row1, axis=1)
                 row2 = np.concatenate(row2, axis=1)
                 super_row = np.concatenate([row1, row2], axis=0)
                 superimage2 = np.zeros_like(superimage)
-                superimage2[:super_row.shape[0],
-                            :super_row.shape[1],
-                            :super_row.shape[2]] = super_row
+                superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row
                 mid_padding = np.zeros((64, superimage.shape[1], 3))
-                superimage = np.concatenate([superimage, mid_padding,
-                                             superimage2], axis=0)
+                superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0)
 
             top_padding = np.zeros((128, superimage.shape[1], 3))
-            superimage =\
-                np.concatenate([top_padding, superimage], axis=0)
+            superimage = np.concatenate([top_padding, superimage], axis=0)
 
             captions = captions_batchs[j][sentenceID]
             fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID)
             superimage = self.drawCaption(np.uint8(superimage), captions)
-            scipy.misc.imsave(fullpath, superimage)
+            imageio.imwrite(fullpath, superimage)
 
     def eval_one_dataset(self, sess, dataset, save_dir, subset='train'):
         count = 0
         print('num_examples:', dataset._num_examples)
         while count < dataset._num_examples:
             start = count % dataset._num_examples
-            images, embeddings_batchs, savenames, captions_batchs =\
-                dataset.next_batch_test(self.batch_size, start, 1)
+            images, embeddings_batchs, savenames, captions_batchs = dataset.next_batch_test(self.batch_size, start, 1)
 
             print('count = ', count, 'start = ', start)
             # the i-th sentence/caption
@@ -640,15 +534,12 @@ def eval_one_dataset(self, sess, dataset, save_dir, subset='train'):
                 # with randomness from noise z and conditioning augmentation.
                 numSamples = np.minimum(16, cfg.TRAIN.NUM_COPY)
                 for j in range(numSamples):
-                    hr_samples, samples =\
-                        sess.run([self.hr_fake_images, self.fake_images],
-                                 {self.embeddings: embeddings_batchs[i]})
+                    hr_samples, samples = sess.run([self.hr_fake_images, self.fake_images],
+                                                   {self.embeddings: embeddings_batchs[i]})
                     samples_batchs.append(samples)
                     hr_samples_batchs.append(hr_samples)
-                self.save_super_images(images, samples_batchs,
-                                       hr_samples_batchs,
-                                       savenames, captions_batchs,
-                                       i, save_dir, subset)
+                self.save_super_images(images, samples_batchs, hr_samples_batchs, savenames, captions_batchs, i,
+                                       save_dir, subset)
 
             count += self.batch_size
 
@@ -659,11 +550,10 @@ def evaluate(self):
                 if self.model_path.find('.ckpt') != -1:
                     self.init_opt()
                     print("Reading model parameters from %s" % self.model_path)
-                    saver = tf.train.Saver(tf.all_variables())
+                    saver = tf.train.Saver(tf.global_variables())
                     saver.restore(sess, self.model_path)
                     # self.eval_one_dataset(sess, self.dataset.train,
                     #                       self.log_dir, subset='train')
-                    self.eval_one_dataset(sess, self.dataset.test,
-                                          self.log_dir, subset='test')
+                    self.eval_one_dataset(sess, self.dataset.test, self.log_dir, subset='test')
                 else:
                     print("Input a valid model path.")