Mallet topic model example can not compile -
i want compile mallet in java (instead using command line), include jar in project, , cite code of example from: http://mallet.cs.umass.edu/topics-devel.php, however, when run code, there error :
exception in thread "main" java.lang.noclassdeffounderror: gnu/trove/tobjectinthashmap @ cc.mallet.types.alphabet.<init>(alphabet.java:51) @ cc.mallet.types.alphabet.<init>(alphabet.java:70) @ cc.mallet.pipe.tokensequence2featuresequence.<init> (tokensequence2featuresequence.java:35) @ mallet.topicmodel.main(topicmodel.java:25) caused by: java.lang.classnotfoundexception: gnu.trove.tobjectinthashmap @ java.net.urlclassloader$1.run(unknown source) @ java.net.urlclassloader$1.run(unknown source) @ java.security.accesscontroller.doprivileged(native method) @ java.net.urlclassloader.findclass(unknown source) @ java.lang.classloader.loadclass(unknown source) @ sun.misc.launcher$appclassloader.loadclass(unknown source) @ java.lang.classloader.loadclass(unknown source) ... 4 more
i not sure causes error. help?
package mallet; import cc.mallet.util.*; import cc.mallet.types.*; import cc.mallet.pipe.*; import cc.mallet.pipe.iterator.*; import cc.mallet.topics.*; import java.util.*; import java.util.regex.*; import java.io.*; public class topicmodel { public static void main(string[] args) throws exception { string filepath = "d:/ap.txt"; // begin importing documents text feature sequences arraylist<pipe> pipelist = new arraylist<pipe>(); // pipes: lowercase, tokenize, remove stopwords, map features pipelist.add( new charsequencelowercase() ); pipelist.add( new charsequence2tokensequence(pattern.compile("\\p{l}[\\p{l}\\p{p}]+\\p{l}")) ); pipelist.add( new tokensequenceremovestopwords(new file("stoplists/en.txt"), "utf-8", false, false, false) ); pipelist.add( new tokensequence2featuresequence() ); instancelist instances = new instancelist (new serialpipes(pipelist)); reader filereader = new inputstreamreader(new fileinputstream(new file(filepath)), "utf-8"); instances.addthrupipe(new csviterator (filereader, pattern.compile("^(\\s*)[\\s,]*(\\s*)[\\s,]*(.*)$"), 3, 2, 1)); // data, label, name fields // create model 100 topics, alpha_t = 0.01, beta_w = 0.01 // note first parameter passed sum on topics, while // second int numtopics = 100; paralleltopicmodel model = new paralleltopicmodel(numtopics, 1.0, 0.01); model.addinstances(instances); // use 2 parallel samplers, each @ 1 half corpus , combine // statistics after every iteration. model.setnumthreads(2); // run model 50 iterations , stop (this testing only, // real applications, use 1000 2000 iterations) model.setnumiterations(50); model.estimate(); // show words , topics in first instance // data alphabet maps word ids strings alphabet dataalphabet = instances.getdataalphabet(); featuresequence tokens = (featuresequence) model.getdata().get(0).instance.getdata(); labelsequence topics = model.getdata().get(0).topicsequence; formatter out = new formatter(new stringbuilder(), locale.us); (int position = 0; position < tokens.getlength(); position++) { out.format("%s-%d ", dataalphabet.lookupobject(tokens.getindexatposition(position)), topics.getindexatposition(position)); } system.out.println(out); // estimate topic distribution of first instance, // given current gibbs state. double[] topicdistribution = model.gettopicprobabilities(0); // array of sorted sets of word id/count pairs arraylist<treeset<idsorter>> topicsortedwords = model.getsortedwords(); // show top 5 words in topics proportions first document (int topic = 0; topic < numtopics; topic++) { iterator<idsorter> iterator = topicsortedwords.get(topic).iterator(); out = new formatter(new stringbuilder(), locale.us); out.format("%d\t%.3f\t", topic, topicdistribution[topic]); int rank = 0; while (iterator.hasnext() && rank < 5) { idsorter idcountpair = iterator.next(); out.format("%s (%.0f) ", dataalphabet.lookupobject(idcountpair.getid()), idcountpair.getweight()); rank++; } system.out.println(out); } // create new instance high probability of topic 0 stringbuilder topiczerotext = new stringbuilder(); iterator<idsorter> iterator = topicsortedwords.get(0).iterator(); int rank = 0; while (iterator.hasnext() && rank < 5) { idsorter idcountpair = iterator.next(); topiczerotext.append(dataalphabet.lookupobject(idcountpair.getid()) + " "); rank++; } // create new instance named "test instance" empty target , source fields. instancelist testing = new instancelist(instances.getpipe()); testing.addthrupipe(new instance(topiczerotext.tostring(), null, "test instance", null)); topicinferencer inferencer = model.getinferencer(); double[] testprobabilities = inferencer.getsampleddistribution(testing.get(0), 10, 1, 5); system.out.println("0\t" + testprobabilities[0]); }
}
i solved problem. firstly, tried import trove3.1 in eclipse not work. then, noticed in mallet folder, there "lib" folder, included jar files in eclipse. bingo! works.
Comments
Post a Comment