diff --git a/README.md b/README.md index b70fa81..b33ae25 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ +## Python 3 Version of Show, Attend and Tell using Tensorflow +This repo is python3 version of [DeepRNN/image_captioning](https://github.com/DeepRNN/image_captioning), which implements "Show, Attend and Tell: Neural Image Caption Generation with Visual Attention" by Xu et al. (ICML2015). Many thanks to [salaniz's coco evaluation tool for python3](https://github.com/salaniz/pycocoevalcap). I am using +- Python 3.6 +- Tensorflow 1.8.0 + +#### Original readme below + ### Introduction This neural system for image captioning is roughly based on the paper "Show, Attend and Tell: Neural Image Caption Generation with Visual Attention" by Xu et al. (ICML2015). The input is an image, and the output is a sentence describing the content of the image. It uses a convolutional neural network to extract visual features from the image, and uses a LSTM recurrent neural network to decode these features into a sentence. A soft attention mechanism is incorporated to improve the quality of the caption. This project is implemented using the Tensorflow library, and allows end-to-end training of both CNN and RNN parts. diff --git a/base_model.py b/base_model.py index 9e0c58e..f291b60 100644 --- a/base_model.py +++ b/base_model.py @@ -2,8 +2,12 @@ import numpy as np import pandas as pd import tensorflow as tf + +import matplotlib +matplotlib.use('agg') + import matplotlib.pyplot as plt -import cPickle as pickle +import pickle import copy import json from tqdm import tqdm @@ -66,6 +70,7 @@ def eval(self, sess, eval_gt_coco, eval_data, vocabulary): config = self.config results = [] + print('config.eval_result_dir:', config.eval_result_dir) if not os.path.exists(config.eval_result_dir): os.mkdir(config.eval_result_dir) @@ -81,7 +86,7 @@ def eval(self, sess, eval_gt_coco, eval_data, vocabulary): word_idxs = caption_data[l][0].sentence score = caption_data[l][0].score caption = vocabulary.get_sentence(word_idxs) - results.append({'image_id': eval_data.image_ids[idx], + results.append({'image_id': eval_data.image_ids[idx].item(), 'caption': caption}) idx += 1 @@ -97,7 +102,7 @@ def eval(self, sess, eval_gt_coco, eval_data, vocabulary): plt.savefig(os.path.join(config.eval_result_dir, image_name+'_result.jpg')) - fp = open(config.eval_result_file, 'wb') + fp = open(config.eval_result_file, 'w') json.dump(results, fp) fp.close() @@ -259,7 +264,7 @@ def load(self, sess, model_file=None): str(global_step)+".npy") print("Loading the model from %s..." %save_path) - data_dict = np.load(save_path).item() + data_dict = np.load(save_path, encoding='latin1').item() count = 0 for v in tqdm(tf.global_variables()): if v.name in data_dict.keys(): @@ -270,11 +275,14 @@ def load(self, sess, model_file=None): def load_cnn(self, session, data_path, ignore_missing=True): """ Load a pretrained CNN model. """ print("Loading the CNN from %s..." %data_path) - data_dict = np.load(data_path).item() + # import pdb; pdb.set_trace() + import os; + data_path = data_path.strip() + data_dict = np.load(os.getcwd() + '/' + data_path, encoding='latin1').item() count = 0 for op_name in tqdm(data_dict): with tf.variable_scope(op_name, reuse = True): - for param_name, data in data_dict[op_name].iteritems(): + for param_name, data in data_dict[op_name].items(): try: var = tf.get_variable(param_name) session.run(var.assign(data)) diff --git a/dataset.py b/dataset.py index 5f656a0..300e10a 100644 --- a/dataset.py +++ b/dataset.py @@ -122,7 +122,7 @@ def prepare_train_data(config): data = {'word_idxs': word_idxs, 'masks': masks} np.save(config.temp_data_file, data) else: - data = np.load(config.temp_data_file).item() + data = np.load(config.temp_data_file, encoding='latin1').item() word_idxs = data['word_idxs'] masks = data['masks'] print("Captions processed.") diff --git a/utils/coco/pycocoevalcap/.gitignore b/utils/coco/pycocoevalcap/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/utils/coco/pycocoevalcap/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/utils/coco/pycocoevalcap/README.md b/utils/coco/pycocoevalcap/README.md new file mode 100644 index 0000000..7081c81 --- /dev/null +++ b/utils/coco/pycocoevalcap/README.md @@ -0,0 +1,44 @@ +Microsoft COCO Caption Evaluation +=================== + +Evaluation codes for MS COCO caption generation. + +## Description ## +This repository provides Python 3 support for the caption evaluation metrics used for the MS COCO dataset. + +The code is derived from the original repository that supports Python 2.7: https://github.com/tylin/coco-caption. +Caption evaluation depends on the COCO API that natively supports Python 3 (see Requirements). + +## Requirements ## +- Java 1.8.0 +- Python 3 (tested on Python 3.6) +- pycocotools (COCO Python API): https://github.com/cocodataset/cocoapi + +## Files ## +./ +- evals.py: The file includes COCOEavlCap class that can be used to evaluate results on COCO. +- tokenizer: Python wrapper of Stanford CoreNLP PTBTokenizer +- bleu: Bleu evalutation codes +- meteor: Meteor evaluation codes +- rouge: Rouge-L evaluation codes +- cider: CIDEr evaluation codes + +## References ## + +- [Microsoft COCO Captions: Data Collection and Evaluation Server](http://arxiv.org/abs/1504.00325) +- PTBTokenizer: We use the [Stanford Tokenizer](http://nlp.stanford.edu/software/tokenizer.shtml) which is included in [Stanford CoreNLP 3.4.1](http://nlp.stanford.edu/software/corenlp.shtml). +- BLEU: [BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf) +- Meteor: [Project page](http://www.cs.cmu.edu/~alavie/METEOR/) with related publications. We use the latest version (1.5) of the [Code](https://github.com/mjdenkowski/meteor). Changes have been made to the source code to properly aggreate the statistics for the entire corpus. +- Rouge-L: [ROUGE: A Package for Automatic Evaluation of Summaries](http://anthology.aclweb.org/W/W04/W04-1013.pdf) +- CIDEr: [CIDEr: Consensus-based Image Description Evaluation] (http://arxiv.org/pdf/1411.5726.pdf) + +## Developers ## +- Xinlei Chen (CMU) +- Hao Fang (University of Washington) +- Tsung-Yi Lin (Cornell) +- Ramakrishna Vedantam (Virgina Tech) + +## Acknowledgement ## +- David Chiang (University of Norte Dame) +- Michael Denkowski (CMU) +- Alexander Rush (Harvard University) diff --git a/utils/coco/pycocoevalcap/bleu/bleu.py b/utils/coco/pycocoevalcap/bleu/bleu.py index b0da5dd..bdebbb5 100755 --- a/utils/coco/pycocoevalcap/bleu/bleu.py +++ b/utils/coco/pycocoevalcap/bleu/bleu.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# +# # File Name : bleu.py # # Description : Wrapper for BLEU scorer. @@ -8,7 +8,7 @@ # Last Modified : Thu 19 Mar 2015 09:13:28 PM PDT # Authors : Hao Fang and Tsung-Yi Lin -from bleu_scorer import BleuScorer +from .bleu_scorer import BleuScorer class Bleu: diff --git a/utils/coco/pycocoevalcap/bleu/bleu_scorer.py b/utils/coco/pycocoevalcap/bleu/bleu_scorer.py index 3685e05..d240336 100755 --- a/utils/coco/pycocoevalcap/bleu/bleu_scorer.py +++ b/utils/coco/pycocoevalcap/bleu/bleu_scorer.py @@ -7,7 +7,7 @@ # reserved. Do not redistribute without permission from the # author. Not for commercial use. -# Modified by: +# Modified by: # Hao Fang # Tsung-Yi Lin @@ -26,8 +26,8 @@ def precook(s, n=4, out=False): can take string arguments as well.""" words = s.split() counts = defaultdict(int) - for k in xrange(1,n+1): - for i in xrange(len(words)-k+1): + for k in range(1,n+1): + for i in range(len(words)-k+1): ngram = tuple(words[i:i+k]) counts[ngram] += 1 return (len(words), counts) @@ -42,7 +42,7 @@ def cook_refs(refs, eff=None, n=4): ## lhuang: oracle will call with "average" for ref in refs: rl, counts = precook(ref, n) reflen.append(rl) - for (ngram,count) in counts.iteritems(): + for (ngram,count) in counts.items(): maxcounts[ngram] = max(maxcounts.get(ngram,0), count) # Calculate effective reference sentence length. @@ -52,21 +52,22 @@ def cook_refs(refs, eff=None, n=4): ## lhuang: oracle will call with "average" reflen = float(sum(reflen))/len(reflen) ## lhuang: N.B.: leave reflen computaiton to the very end!! - + ## lhuang: N.B.: in case of "closest", keep a list of reflens!! (bad design) return (reflen, maxcounts) -def cook_test(test, (reflen, refmaxcounts), eff=None, n=4): +def cook_test(test, refs, eff=None, n=4): '''Takes a test sentence and returns an object that encapsulates everything that BLEU needs to know about it.''' + reflen, refmaxcounts = refs testlen, counts = precook(test, n, True) result = {} # Calculate effective reference sentence length. - + if eff == "closest": result["reflen"] = min((abs(l-testlen), l) for l in reflen)[1] else: ## i.e., "average" or "shortest" or None @@ -74,10 +75,10 @@ def cook_test(test, (reflen, refmaxcounts), eff=None, n=4): result["testlen"] = testlen - result["guess"] = [max(0,testlen-k+1) for k in xrange(1,n+1)] + result["guess"] = [max(0,testlen-k+1) for k in range(1,n+1)] result['correct'] = [0]*n - for (ngram, count) in counts.iteritems(): + for (ngram, count) in counts.items(): result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count) return result @@ -108,7 +109,7 @@ def __init__(self, test=None, refs=None, n=4, special_reflen=None): def cook_append(self, test, refs): '''called by constructor and __iadd__ to avoid creating new instances.''' - + if refs is not None: self.crefs.append(cook_refs(refs)) if test is not None: @@ -136,7 +137,7 @@ def reflen(self, option=None): def testlen(self, option=None): self.compute_score(option=option) - return self._testlen + return self._testlen def retest(self, new_test): if type(new_test) is str: @@ -151,7 +152,7 @@ def retest(self, new_test): def rescore(self, new_test): ''' replace test(s) with new test(s), and returns the new score.''' - + return self.retest(new_test).compute_score() def size(self): @@ -170,7 +171,7 @@ def __iadd__(self, other): self.crefs.extend(other.crefs) self._score = None ## need to recompute - return self + return self def compatible(self, other): return isinstance(other, BleuScorer) and self.n == other.n @@ -179,7 +180,7 @@ def single_reflen(self, option="average"): return self._single_reflen(self.crefs[0][0], option) def _single_reflen(self, reflens, option=None, testlen=None): - + if option == "shortest": reflen = min(reflens) elif option == "average": @@ -194,7 +195,7 @@ def _single_reflen(self, reflens, option=None, testlen=None): def recompute_score(self, option=None, verbose=0): self._score = None return self.compute_score(option, verbose) - + def compute_score(self, option=None, verbose=0): n = self.n small = 1e-9 @@ -212,7 +213,7 @@ def compute_score(self, option=None, verbose=0): totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n} # for each sentence - for comps in self.ctest: + for comps in self.ctest: testlen = comps['testlen'] self._testlen += testlen @@ -222,42 +223,42 @@ def compute_score(self, option=None, verbose=0): reflen = self.special_reflen self._reflen += reflen - + for key in ['guess','correct']: - for k in xrange(n): + for k in range(n): totalcomps[key][k] += comps[key][k] # append per image bleu score bleu = 1. - for k in xrange(n): + for k in range(n): bleu *= (float(comps['correct'][k]) + tiny) \ - /(float(comps['guess'][k]) + small) + /(float(comps['guess'][k]) + small) bleu_list[k].append(bleu ** (1./(k+1))) ratio = (testlen + tiny) / (reflen + small) ## N.B.: avoid zero division if ratio < 1: - for k in xrange(n): + for k in range(n): bleu_list[k][-1] *= math.exp(1 - 1/ratio) if verbose > 1: - print comps, reflen + print(comps, reflen) totalcomps['reflen'] = self._reflen totalcomps['testlen'] = self._testlen bleus = [] bleu = 1. - for k in xrange(n): + for k in range(n): bleu *= float(totalcomps['correct'][k] + tiny) \ / (totalcomps['guess'][k] + small) bleus.append(bleu ** (1./(k+1))) ratio = (self._testlen + tiny) / (self._reflen + small) ## N.B.: avoid zero division if ratio < 1: - for k in xrange(n): + for k in range(n): bleus[k] *= math.exp(1 - 1/ratio) if verbose > 0: - print totalcomps - print "ratio:", ratio + print(totalcomps) + print("ratio:", ratio) self._score = bleus return self._score, bleu_list diff --git a/utils/coco/pycocoevalcap/cider/cider.py b/utils/coco/pycocoevalcap/cider/cider.py index d0b99ee..18f6cc4 100755 --- a/utils/coco/pycocoevalcap/cider/cider.py +++ b/utils/coco/pycocoevalcap/cider/cider.py @@ -1,18 +1,18 @@ # Filename: cider.py # -# Description: Describes the class to compute the CIDEr (Consensus-Based Image Description Evaluation) Metric +# Description: Describes the class to compute the CIDEr (Consensus-Based Image Description Evaluation) Metric # by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726) # # Creation Date: Sun Feb 8 14:16:54 2015 # # Authors: Ramakrishna Vedantam and Tsung-Yi Lin -from cider_scorer import CiderScorer +from .cider_scorer import CiderScorer import pdb class Cider: """ - Main Class to compute the CIDEr metric + Main Class to compute the CIDEr metric """ def __init__(self, test=None, refs=None, n=4, sigma=6.0): @@ -26,7 +26,7 @@ def compute_score(self, gts, res): Main function to compute CIDEr score :param hypo_for_image (dict) : dictionary with key and value ref_for_image (dict) : dictionary with key and value - :return: cider (float) : computed CIDEr score for the corpus + :return: cider (float) : computed CIDEr score for the corpus """ assert(gts.keys() == res.keys()) @@ -51,4 +51,4 @@ def compute_score(self, gts, res): return score, scores def method(self): - return "CIDEr" \ No newline at end of file + return "CIDEr" diff --git a/utils/coco/pycocoevalcap/cider/cider_scorer.py b/utils/coco/pycocoevalcap/cider/cider_scorer.py index a73405e..94d6689 100755 --- a/utils/coco/pycocoevalcap/cider/cider_scorer.py +++ b/utils/coco/pycocoevalcap/cider/cider_scorer.py @@ -19,8 +19,8 @@ def precook(s, n=4, out=False): """ words = s.split() counts = defaultdict(int) - for k in xrange(1,n+1): - for i in xrange(len(words)-k+1): + for k in range(1,n+1): + for i in range(len(words)-k+1): ngram = tuple(words[i:i+k]) counts[ngram] += 1 return counts @@ -99,7 +99,7 @@ def compute_doc_freq(self): ''' for refs in self.crefs: # refs, k ref captions of one image - for ngram in set([ngram for ref in refs for (ngram,count) in ref.iteritems()]): + for ngram in set([ngram for ref in refs for (ngram,count) in ref.items()]): self.document_frequency[ngram] += 1 # maxcounts[ngram] = max(maxcounts.get(ngram,0), count) @@ -115,7 +115,7 @@ def counts2vec(cnts): vec = [defaultdict(float) for _ in range(self.n)] length = 0 norm = [0.0 for _ in range(self.n)] - for (ngram,term_freq) in cnts.iteritems(): + for (ngram,term_freq) in cnts.items(): # give word count 1 if it doesn't appear in reference corpus df = np.log(max(1.0, self.document_frequency[ngram])) # ngram index @@ -146,7 +146,7 @@ def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref): val = np.array([0.0 for _ in range(self.n)]) for n in range(self.n): # ngram - for (ngram,count) in vec_hyp[n].iteritems(): + for (ngram,count) in vec_hyp[n].items(): # vrama91 : added clipping val[n] += min(vec_hyp[n][ngram], vec_ref[n][ngram]) * vec_ref[n][ngram] @@ -189,4 +189,4 @@ def compute_score(self, option=None, verbose=0): score = self.compute_cider() # debug # print score - return np.mean(np.array(score)), np.array(score) \ No newline at end of file + return np.mean(np.array(score)), np.array(score) diff --git a/utils/coco/pycocoevalcap/eval.py b/utils/coco/pycocoevalcap/eval.py index bfbac93..d129222 100755 --- a/utils/coco/pycocoevalcap/eval.py +++ b/utils/coco/pycocoevalcap/eval.py @@ -1,9 +1,9 @@ __author__ = 'tylin' -from tokenizer.ptbtokenizer import PTBTokenizer -from bleu.bleu import Bleu -from meteor.meteor import Meteor -from rouge.rouge import Rouge -from cider.cider import Cider +from .tokenizer.ptbtokenizer import PTBTokenizer +from .bleu.bleu import Bleu +from .meteor.meteor import Meteor +from .rouge.rouge import Rouge +from .cider.cider import Cider class COCOEvalCap: def __init__(self, coco, cocoRes): @@ -26,7 +26,7 @@ def evaluate(self): # ================================================= # Set up scorers # ================================================= - print 'tokenization...' + print('tokenization...') tokenizer = PTBTokenizer() gts = tokenizer.tokenize(gts) res = tokenizer.tokenize(res) @@ -34,7 +34,7 @@ def evaluate(self): # ================================================= # Set up scorers # ================================================= - print 'setting up scorers...' + print('setting up scorers...') scorers = [ (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), (Meteor(),"METEOR"), @@ -46,17 +46,17 @@ def evaluate(self): # Compute scores # ================================================= for scorer, method in scorers: - print 'computing %s score...'%(scorer.method()) + print('computing %s score...'%(scorer.method())) score, scores = scorer.compute_score(gts, res) if type(method) == list: for sc, scs, m in zip(score, scores, method): self.setEval(sc, m) self.setImgToEvalImgs(scs, gts.keys(), m) - print "%s: %0.3f"%(m, sc) + print("%s: %0.3f"%(m, sc)) else: self.setEval(score, method) self.setImgToEvalImgs(scores, gts.keys(), method) - print "%s: %0.3f"%(method, score) + print("%s: %0.3f"%(method, score)) self.setEvalImgs() def setEval(self, score, method): @@ -70,4 +70,4 @@ def setImgToEvalImgs(self, scores, imgIds, method): self.imgToEval[imgId][method] = score def setEvalImgs(self): - self.evalImgs = [eval for imgId, eval in self.imgToEval.items()] \ No newline at end of file + self.evalImgs = [eval for imgId, eval in self.imgToEval.items()] diff --git a/utils/coco/pycocoevalcap/license.txt b/utils/coco/pycocoevalcap/license.txt new file mode 100644 index 0000000..7428d36 --- /dev/null +++ b/utils/coco/pycocoevalcap/license.txt @@ -0,0 +1,26 @@ +Copyright (c) 2015, Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of the FreeBSD Project. diff --git a/utils/coco/pycocoevalcap/meteor/meteor.py b/utils/coco/pycocoevalcap/meteor/meteor.py index 9a61606..61916f4 100755 --- a/utils/coco/pycocoevalcap/meteor/meteor.py +++ b/utils/coco/pycocoevalcap/meteor/meteor.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Python wrapper for METEOR implementation, by Xinlei Chen -# Acknowledge Michael Denkowski for the generous discussion and help +# Acknowledge Michael Denkowski for the generous discussion and help import os import sys @@ -37,7 +37,8 @@ def compute_score(self, gts, res): stat = self._stat(res[i][0], gts[i]) eval_line += ' ||| {}'.format(stat) - self.meteor_p.stdin.write('{}\n'.format(eval_line)) + self.meteor_p.stdin.write('{}\n'.format(eval_line).encode()) + self.meteor_p.stdin.flush() for i in range(0,len(imgIds)): scores.append(float(self.meteor_p.stdout.readline().strip())) score = float(self.meteor_p.stdout.readline().strip()) @@ -52,8 +53,9 @@ def _stat(self, hypothesis_str, reference_list): # SCORE ||| reference 1 words ||| reference n words ||| hypothesis words hypothesis_str = hypothesis_str.replace('|||','').replace(' ',' ') score_line = ' ||| '.join(('SCORE', ' ||| '.join(reference_list), hypothesis_str)) - self.meteor_p.stdin.write('{}\n'.format(score_line)) - return self.meteor_p.stdout.readline().strip() + self.meteor_p.stdin.write('{}\n'.format(score_line).encode()) + self.meteor_p.stdin.flush() + return self.meteor_p.stdout.readline().decode().strip() def _score(self, hypothesis_str, reference_list): self.lock.acquire() @@ -63,7 +65,7 @@ def _score(self, hypothesis_str, reference_list): self.meteor_p.stdin.write('{}\n'.format(score_line)) stats = self.meteor_p.stdout.readline().strip() eval_line = 'EVAL ||| {}'.format(stats) - # EVAL ||| stats + # EVAL ||| stats self.meteor_p.stdin.write('{}\n'.format(eval_line)) score = float(self.meteor_p.stdout.readline().strip()) # bug fix: there are two values returned by the jar file, one average, and one all, so do it twice @@ -71,8 +73,8 @@ def _score(self, hypothesis_str, reference_list): score = float(self.meteor_p.stdout.readline().strip()) self.lock.release() return score - - def __exit__(self): + + def __del__(self): self.lock.acquire() self.meteor_p.stdin.close() self.meteor_p.kill() diff --git a/utils/coco/pycocoevalcap/rouge/rouge.py b/utils/coco/pycocoevalcap/rouge/rouge.py index 3a10f5a..842f397 100755 --- a/utils/coco/pycocoevalcap/rouge/rouge.py +++ b/utils/coco/pycocoevalcap/rouge/rouge.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# +# # File Name : rouge.py # # Description : Computes ROUGE-L metric as described by Lin and Hovey (2004) @@ -49,14 +49,14 @@ def calc_score(self, candidate, refs): :param refs: list of str : COCO reference sentences for the particular image to be evaluated :returns score: int (ROUGE-L score for the candidate evaluated against references) """ - assert(len(candidate)==1) - assert(len(refs)>0) + assert(len(candidate)==1) + assert(len(refs)>0) prec = [] rec = [] # split into tokens token_c = candidate[0].split(" ") - + for reference in refs: # split into tokens token_r = reference.split(" ") @@ -77,8 +77,8 @@ def calc_score(self, candidate, refs): def compute_score(self, gts, res): """ Computes Rouge-L score given a set of reference and candidate sentences for the dataset - Invoked by evaluate_captions.py - :param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values + Invoked by evaluate_captions.py + :param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values :param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values :returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images) """ diff --git a/utils/coco/pycocoevalcap/tokenizer/ptbtokenizer.py b/utils/coco/pycocoevalcap/tokenizer/ptbtokenizer.py index aa790ba..6b7f370 100755 --- a/utils/coco/pycocoevalcap/tokenizer/ptbtokenizer.py +++ b/utils/coco/pycocoevalcap/tokenizer/ptbtokenizer.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# +# # File Name : ptbtokenizer.py # # Description : Do the PTB Tokenization and remove punctuations. @@ -19,7 +19,7 @@ # punctuations to be removed from the sentences PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-", \ - ".", "?", "!", ",", ":", "-", "--", "...", ";"] + ".", "?", "!", ",", ":", "-", "--", "...", ";"] class PTBTokenizer: """Python wrapper of Stanford PTBTokenizer""" @@ -41,7 +41,7 @@ def tokenize(self, captions_for_image): # ====================================================== path_to_jar_dirname=os.path.dirname(os.path.abspath(__file__)) tmp_file = tempfile.NamedTemporaryFile(delete=False, dir=path_to_jar_dirname) - tmp_file.write(sentences) + tmp_file.write(sentences.encode()) tmp_file.close() # ====================================================== @@ -51,6 +51,7 @@ def tokenize(self, captions_for_image): p_tokenizer = subprocess.Popen(cmd, cwd=path_to_jar_dirname, \ stdout=subprocess.PIPE) token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0] + token_lines = token_lines.decode() lines = token_lines.split('\n') # remove temp file os.remove(tmp_file.name)