diff --git a/README.md b/README.md index 760c5a9..c73318d 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,11 @@ The HelloQA pipeline works out of the box, but you'll need to download the Gutenberg index input data separately (it's a big file and doesn't make sense to keep it in the repository, bad practice and all). +## Dataset Preparation + +!!! Note that the following is obsolete. Dataset is included in the repo. +See also https://github.com/oaqa/helloqa/wiki/DSO-Project !!! + 1. Download the Gutenberg index [here](https://github.com/downloads/oaqa/helloqa/guten.tar.gz "guten.tar.gz"). @@ -16,6 +21,8 @@ keep it in the repository, bad practice and all). project such that your helloqa/data/guten directory contains bin, conf, data, lib, etc. +## Running the Pipeline (Eclipse) + You're now ready to run the pipeline. 1. Open project in Eclipse. @@ -25,3 +32,12 @@ You're now ready to run the pipeline. 3. Expand "launches" and right-click on test.launch > Run As > test. This launch should now be the default behavior whenever you click the "Run" button in the toolbar while you're in the helloqa project. + +## Running the Pipeline (commandline) + +As an alternative, you may run the project outside of Eclipse. Simply +issue the command: + + mvn exec:exec -Dexec.executable=java -Dexec.args="-Djava.library.path=lib/ -classpath %classpath edu.cmu.lti.oaqa.ecd.driver.ECDDriver phases.err-analysis-IE-dsoqa" + +You may vary the last argument, see the src/main/resources/phases/ directory. diff --git a/data/oaqa-eval.db3 b/data/oaqa-eval.db3 index d553432..70d8c2c 100644 Binary files a/data/oaqa-eval.db3 and b/data/oaqa-eval.db3 differ diff --git a/launches/test.launch b/launches/test.launch index dcced8f..cb09e26 100644 --- a/launches/test.launch +++ b/launches/test.launch @@ -10,7 +10,7 @@ - + diff --git a/lib/libindri_jni.so b/lib/libindri_jni.so old mode 100644 new mode 100755 diff --git a/pom.xml b/pom.xml index 3384aed..29a0024 100644 --- a/pom.xml +++ b/pom.xml @@ -45,7 +45,7 @@ oaqa-thirdparty - http://mu.lti.cs.cmu.edu:8081/nexus/content/repositories/oaqa-thirdparty + http://mu.lti.cs.cmu.edu:8081/nexus/content/repositories/thirdparty @@ -234,6 +234,19 @@ + + maven-assembly-plugin + + + + edu.cmu.lti.oaqa.ecd.driver.ECDDriver + + + + jar-with-dependencies + + + diff --git a/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/ie/ErrAnalysisInformationExtractor.java b/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/ie/ErrAnalysisInformationExtractor.java index 7f89c41..1ea48ca 100644 --- a/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/ie/ErrAnalysisInformationExtractor.java +++ b/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/ie/ErrAnalysisInformationExtractor.java @@ -41,7 +41,7 @@ public void initialize(UimaContext aContext) @Override public void initialize() { - String filePathName = "/home/ruil/workspace/git/helloqa/src/main/resources/gs/dso-extension-answerkey.txt"; + String filePathName = "src/main/resources/gs/dso-extension-answerkey.txt"; List lines = FileUtil.readFile(filePathName); for (int i = 0; i < lines.size(); i++) { String psggs = lines.get(i); diff --git a/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/passage/ErrAnalysisPassageRetrieval.java b/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/passage/ErrAnalysisPassageRetrieval.java index 661d454..dc6c2c0 100644 --- a/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/passage/ErrAnalysisPassageRetrieval.java +++ b/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/passage/ErrAnalysisPassageRetrieval.java @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.io.File; import org.apache.uima.UimaContext; import org.apache.uima.resource.ResourceInitializationException; @@ -37,21 +38,23 @@ public void initialize(UimaContext aContext) @Override public void initialize() { - localWikiSearcher = new LocalCorpusSearcher(); - localDSOSearcher = new LocalCorpusSearcher(); - - DSOLocalRetrievalCache dso_cache = new DSOLocalRetrievalCache(); - WikiLocalRetrievalCache wiki_cache = new WikiLocalRetrievalCache(); - - searchers.add(localWikiSearcher); - searchers.add(localDSOSearcher); + String wikiIndex = "/home/ruil/Downloads/Indexes/wikipedia"; + if (new File(wikiIndex).isFile()) { + localWikiSearcher = new LocalCorpusSearcher(); + WikiLocalRetrievalCache wiki_cache = new WikiLocalRetrievalCache(); + localWikiSearcher.initialize(wiki_cache.getInstance(), wikiIndex); + searchers.add(localWikiSearcher); + } - localWikiSearcher.initialize(wiki_cache.getInstance(), - "/home/ruil/Downloads/Indexes/wikipedia"); - localDSOSearcher.initialize(dso_cache.getInstance(), - "xmirepo/dso/index"); + String DSOIndex = "xmirepo/dso/index"; + if (new File(DSOIndex).isFile()) { + localDSOSearcher = new LocalCorpusSearcher(); + DSOLocalRetrievalCache dso_cache = new DSOLocalRetrievalCache(); + localDSOSearcher.initialize(dso_cache.getInstance(), DSOIndex); + searchers.add(localDSOSearcher); + } - String filePathName = "/home/ruil/workspace/git/helloqa/src/main/resources/gs/dso-extension-psg.txt"; + String filePathName = "src/main/resources/gs/dso-extension-psg.txt"; List lines = FileUtil.readFile(filePathName); for (int i = 0; i < lines.size(); i++) { String psggs = lines.get(i); @@ -73,13 +76,17 @@ public List retrieveDocuments(String qid, String answerType) { List mergedresults = new ArrayList(); - List localWikipassages = localWikiSearcher - .retrieveDocuments(keyterms, keyphrases, question, answerType); - mergedresults.addAll(localWikipassages); + if (localWikiSearcher != null) { + List localWikipassages = localWikiSearcher + .retrieveDocuments(keyterms, keyphrases, question, answerType); + mergedresults.addAll(localWikipassages); + } - List localDSOpassages = localDSOSearcher - .retrieveDocuments(keyterms, keyphrases, question, answerType); - mergedresults.addAll(localDSOpassages); + if (localDSOSearcher != null) { + List localDSOpassages = localDSOSearcher + .retrieveDocuments(keyterms, keyphrases, question, answerType); + mergedresults.addAll(localDSOpassages); + } RetrievalResult gspassage = readGsPsgs(qid); mergedresults.add(gspassage); diff --git a/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/passage/PassageRetrieval.java b/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/passage/PassageRetrieval.java index 0360164..3ec7dc8 100644 --- a/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/passage/PassageRetrieval.java +++ b/src/main/java/edu/cmu/lti/oaqa/openqa/dso/phase/passage/PassageRetrieval.java @@ -2,6 +2,7 @@ import java.util.ArrayList; import java.util.List; +import java.io.File; import org.apache.uima.UimaContext; import org.apache.uima.resource.ResourceInitializationException; @@ -35,20 +36,27 @@ public void initialize(UimaContext aContext) @Override public void initialize() { - localWikiSearcher = new LocalCorpusSearcher(); - localDSOSearcher=new LocalCorpusSearcher(); //webSearcher = new WebDocumentSearcher(); //rdfSearcher = new RDFSearcher(); - DSOLocalRetrievalCache dso_cache=new DSOLocalRetrievalCache(); - WikiLocalRetrievalCache wiki_cache=new WikiLocalRetrievalCache(); - - searchers.add(localWikiSearcher); - searchers.add(localDSOSearcher); //searchers.add(rdfSearcher); - localWikiSearcher.initialize(wiki_cache.getInstance(), "/home/ruil/Downloads/Indexes/wikipedia"); - localDSOSearcher.initialize(dso_cache.getInstance(), "xmirepo/dso/index"); + String wikiIndex = "/home/ruil/Downloads/Indexes/wikipedia"; + if (new File(wikiIndex).isFile()) { + localWikiSearcher = new LocalCorpusSearcher(); + WikiLocalRetrievalCache wiki_cache=new WikiLocalRetrievalCache(); + localWikiSearcher.initialize(wiki_cache.getInstance(), wikiIndex); + searchers.add(localWikiSearcher); + } + + String DSOIndex = "xmirepo/dso/index"; + if (new File(DSOIndex).isFile()) { + localDSOSearcher=new LocalCorpusSearcher(); + DSOLocalRetrievalCache dso_cache=new DSOLocalRetrievalCache(); + localDSOSearcher.initialize(dso_cache.getInstance(), DSOIndex); + searchers.add(localDSOSearcher); + } + // try { // rdfSearcher.initialize(null, ""); // } catch (JSONException e) { @@ -64,13 +72,17 @@ public List retrieveDocuments(String qid, List keyterms // keyterms, keyphrases, question, answerType); // mergedresults.addAll(RDFpassages); - List localWikipassages = localWikiSearcher.retrieveDocuments( - keyterms, keyphrases, question, answerType); - mergedresults.addAll(localWikipassages); + if (localWikiSearcher != null) { + List localWikipassages = localWikiSearcher.retrieveDocuments( + keyterms, keyphrases, question, answerType); + mergedresults.addAll(localWikipassages); + } - List localDSOpassages = localDSOSearcher.retrieveDocuments( - keyterms, keyphrases, question, answerType); - mergedresults.addAll(localDSOpassages); + if (localDSOSearcher != null) { + List localDSOpassages = localDSOSearcher.retrieveDocuments( + keyterms, keyphrases, question, answerType); + mergedresults.addAll(localDSOpassages); + } // List webpassages = webSearcher.retrieveDocuments( // keyterms, keyphrases, question); diff --git a/src/main/java/edu/cmu/lti/oaqa/openqa/dso/util/GSFileConverter.java b/src/main/java/edu/cmu/lti/oaqa/openqa/dso/util/GSFileConverter.java index 53c2d17..c6ce324 100644 --- a/src/main/java/edu/cmu/lti/oaqa/openqa/dso/util/GSFileConverter.java +++ b/src/main/java/edu/cmu/lti/oaqa/openqa/dso/util/GSFileConverter.java @@ -75,7 +75,7 @@ static void TREC2CSE_QUestion(){ static void Question(){ HashMap questionMap=new HashMap(); - ArrayList lines=FileUtil.readFile("/home/ruil/workspace/git/helloqa/src/main/resources/gs/dso-extension-psg.txt"); + ArrayList lines=FileUtil.readFile("src/main/resources/gs/dso-extension-psg.txt"); int index=1; for(String line:lines){ diff --git a/src/main/resources/META-INF/org.uimafit/types.txt b/src/main/resources/META-INF/org.uimafit/types.txt index c0c5a93..3350d0a 100644 --- a/src/main/resources/META-INF/org.uimafit/types.txt +++ b/src/main/resources/META-INF/org.uimafit/types.txt @@ -1 +1,2 @@ classpath*:edu/cmu/lti/oaqa/OAQATypes.xml +classpath*:edu/cmu/lti/oaqa/frameworkTypesDescriptor.xml diff --git a/src/main/resources/phases/dsoqa.yaml b/src/main/resources/phases/dsoqa.yaml index 66aea83..f976269 100644 --- a/src/main/resources/phases/dsoqa.yaml +++ b/src/main/resources/phases/dsoqa.yaml @@ -32,7 +32,7 @@ pipeline: - inherit: jdbc.sqlite.cse.phase name: passage-retrieval options: | - - inherit: phases.PassageRetrieval + - inherit: phases.pr.PassageRetrieval - inherit: jdbc.sqlite.cse.phase name: information-extractor