package in.cdacn.nlp.ilmt; import java.net.Socket; import java.net.ServerSocket; import java.net.BindException; import java.io.File; import java.io.FileWriter; import java.io.PrintWriter; import java.io.IOException; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.Base64; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import sanchay.corpus.ssf.SSFProperties; import sanchay.corpus.ssf.SSFSentence; import sanchay.corpus.ssf.SSFStory; import sanchay.corpus.ssf.features.FeatureAttribute; import sanchay.corpus.ssf.features.FeatureStructure; import sanchay.corpus.ssf.features.FeatureStructures; import sanchay.corpus.ssf.features.FeatureValue; import sanchay.corpus.ssf.features.impl.FSProperties; import sanchay.corpus.ssf.features.impl.FeatureStructuresImpl; import sanchay.corpus.ssf.impl.SSFSentenceImpl; import sanchay.corpus.ssf.impl.SSFStoryImpl; import sanchay.corpus.ssf.tree.SSFNode; import sanchay.corpus.ssf.tree.SSFPhrase; public class SSFReading { public void convertFile(String InputFile, String Output) throws FileNotFoundException, UnsupportedEncodingException, IOException { FSProperties fsp = new FSProperties(); SSFProperties ssfp = new SSFProperties(); SSFProperties cmlp = new SSFProperties(); SSFStory story = new SSFStoryImpl(); SSFSentence sentence = new SSFSentenceImpl(); try { fsp.read(GuessMorph.stHomedirPath + "/Sanchay/props/fs-mandatory-attribs.txt", GuessMorph.stHomedirPath + "/Sanchay/props/fs-props.txt", "UTF-8"); ssfp.read(GuessMorph.stHomedirPath + "/Sanchay/props/ssf-props.txt", "UTF-8"); cmlp.read(GuessMorph.stHomedirPath + "/Sanchay/props/cml-props.txt", "UTF-8"); FeatureStructuresImpl.setFSProperties(fsp); SSFNode.setSSFProperties(ssfp); SSFNode.setCMLProperties(cmlp); if (GuessMorph.isDaemon) { try { ServerSocket listener = new ServerSocket(GuessMorph.daemonPort); int clientNumber = 0; try { while (true) { new Guess(listener.accept(), clientNumber++, this).start(); } } finally { listener.close(); } } catch (BindException e) { //log.error(e); } } else { story.readFile(InputFile); readSentence(story); if (GuessMorph.isOutputFile) story.save(Output, "UTF-8"); else story.print(System.out); } } catch (Exception ex) { ex.printStackTrace(); } } public void readSentence(SSFStory story) throws Exception { int count = story.countSentences(); for (int i = 0; i < count; i++) { SSFSentence sent = story.getSentence(i); int ccount = sent.getRoot().getChildCount(); for (int j = 0; j < ccount; j++) { SSFNode node = sent.getRoot().getChild(j); String word = ""; SSFNode tempNode = node; for (int index = 0; index < node.countChildren(); index++) { SSFNode nodeChild = (SSFNode)node.getChildAt(index); word = nodeChild.getLexData(); if (word.equals("hEM")) { getRightHeM(node); } } if (node.getName().equals("NP")) { doCategoryMap(node); node = tempNode; if (dochunkContainPSP(node)) { doNounGuessForPSP(tempNode); } else { doNounGuess(tempNode); } } if ((node.getName().equals("VGNF")) || (node.getName().equals("VGF"))) { getCorrectFeatureForVGNFandVGF(node, node.getName()); } if (node.getFeatureStructures() != null) { String stChunkName = node.getName(); FeatureStructures fss = node.getFeatureStructures(); if (node.getChildCount() > 0) { for (int k = 0; k < node.countChildren(); k++) { SSFNode nodeChild = (SSFNode)node.getChildAt(k); String wordLexChild = nodeChild.getLexData(); if (stChunkName.equals("VGNN")) { doVGNNGuessFirstStage(nodeChild); doVGNNGuessSecodStage(nodeChild); } } } } } } secondStagePrunning(story); } void secondStagePrunning(SSFStory story) throws Exception { int count = story.countSentences(); for (int i = 0; i < count; i++) { SSFSentence sent = story.getSentence(i); int ccount = sent.getRoot().getChildCount(); for (int j = 0; j < ccount; j++) { SSFNode node = sent.getRoot().getChild(j); String word = node.getLexData(); if ((dochunkContainPSP(node)) && (dochunkContainkA(node))) { int index = j; ArrayList alfeatures = getGNCPSPnodekA(node); String[] gnc; for (int icount = 0; icount < alfeatures.size(); icount++) { gnc = (String[])alfeatures.get(icount); } if (index + 1 < count) { SSFNode tempNode = sent.getRoot().getChild(index + 1); matchGNC(tempNode, alfeatures); } } } } } void doCategoryMap(SSFNode node) throws Exception { for (int i = 0; i < node.countChildren(); i++) { SSFNode nodeChild = (SSFNode)node.getChildAt(i); FeatureStructure fs = null; String stLexCat = nodeChild.getName(); StringBuilder sb = new StringBuilder(); if (nodeChild.getFeatureStructures() != null) { FeatureStructures fss1 = nodeChild.getFeatureStructures(); for (int iCount = 0; iCount < fss1.countAltFSValues(); iCount++) { fs = fss1.getAltFSValue(iCount); int acount = fs.countAttributes(); for (int l = 0; l < acount; l++) { FeatureAttribute fa = fs.getAttribute(l); if ((fa != null) && (fa.getName().equals("cat"))) { FeatureValue fCat = fa.getAltValue(0); if (((stLexCat.equals("QC")) || (stLexCat.equals("QF")) || (stLexCat.equals("QO")) || (stLexCat.equals("JJC"))) && (fCat != null) && (fCat.toString().equals("adj"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } if ((stLexCat.equals("NNC")) && (fCat != null) && (fCat.toString().equals("n"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } if ((stLexCat.equals("PRPC")) && (fCat != null) && (fCat.toString().equals("pn"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } if ((stLexCat.equals("RBC")) && (fCat != null) && (fCat.toString().equals("adv"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } if ((stLexCat.equals("VAUX")) && (fCat != null) && (fCat.toString().equals("v"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } } } } } else { System.out.println("There is no FeatureStructure"); } } } void doNounGuess(SSFNode node) throws Exception { for (int i = 0; i < node.countChildren(); i++) { SSFNode nodeChild = (SSFNode)node.getChildAt(i); FeatureStructure fs = null; String stLexCat = nodeChild.getName(); StringBuilder sb = new StringBuilder(); if (nodeChild.getFeatureStructures() != null) { FeatureStructures fss1 = nodeChild.getFeatureStructures(); for (int iCount = 0; iCount < fss1.countAltFSValues(); iCount++) { fs = fss1.getAltFSValue(iCount); int acount = fs.countAttributes(); for (int l = 0; l < acount; l++) { FeatureAttribute fa = fs.getAttribute(l); if ((fa != null) && (fa.getName().equals("case"))) { FeatureValue fCase = fa.getAltValue(0); if ((stLexCat.equals("NN")) && (fCase != null) && (fCase.toString().equals("d"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } } } } } else { System.out.println("There is no FeatureStructure"); } } } void doNounGuessForPSP(SSFNode node) throws Exception { for (int i = 0; i < node.countChildren(); i++) { SSFNode nodeChild = (SSFNode)node.getChildAt(i); FeatureStructure fs = null; String stLexCat = nodeChild.getName(); StringBuilder sb = new StringBuilder(); if (nodeChild.getFeatureStructures() != null) { FeatureStructures fss1 = nodeChild.getFeatureStructures(); for (int iCount = 0; iCount < fss1.countAltFSValues(); iCount++) { fs = fss1.getAltFSValue(iCount); int acount = fs.countAttributes(); for (int l = 0; l < acount; l++) { FeatureAttribute fa = fs.getAttribute(l); if ((fa != null) && (fa.getName().equals("case"))) { FeatureValue fv = fa.getAltValue(0); if ((!stLexCat.equals("PSP")) && (fv.toString().equals("o"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } } } } } else { System.out.println("There is no FeatureStructure"); } } } void doVGNNGuessFirstStage(SSFNode nodeChild) throws Exception { FeatureStructure fs = null; String stLexCat = nodeChild.getName(); String stLex = nodeChild.getLexData(); StringBuilder sb = new StringBuilder(); if (nodeChild.getFeatureStructures() != null) { FeatureStructures fss1 = nodeChild.getFeatureStructures(); for (int iCount = 0; iCount < fss1.countAltFSValues(); iCount++) { fs = fss1.getAltFSValue(iCount); int acount = fs.countAttributes(); for (int l = 0; l < acount; l++) { FeatureAttribute fa = fs.getAttribute(l); if ((fa != null) && (fa.getName().equals("root"))) { FeatureValue fv = fa.getAltValue(0); if (((stLexCat.equals("VM")) || (stLexCat.equals("VAUX"))) && (!fv.toString().equals(stLex))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } } } } } else { System.out.println("There is no FeatureStructure"); } } void doVGNNGuessSecodStage(SSFNode nodeChild) throws Exception { FeatureStructure fs = null; String stLexCat = nodeChild.getName(); String stLex = nodeChild.getLexData(); StringBuilder sb = new StringBuilder(); if (nodeChild.getFeatureStructures() != null) { FeatureStructures fss1 = nodeChild.getFeatureStructures(); for (int iCount = 0; iCount < fss1.countAltFSValues(); iCount++) { fs = fss1.getAltFSValue(iCount); int acount = fs.countAttributes(); for (int l = 0; l < acount; l++) { FeatureAttribute fa = fs.getAttribute(l); String stNA = "nA"; if ((fa != null) && (fa.getName().equals("tam"))) { FeatureValue fv = fa.getAltValue(0); if (((stLexCat.equals("VM")) || (stLexCat.equals("VAUX"))) && (fv.toString().equals(stNA))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } } } } } else { System.out.println("There is no FeatureStructure"); } } boolean dochunkContainPSP(SSFNode chunkNode) { boolean isPSP = false; int chunkSize = chunkNode.countChildren(); for (int j = 0; j < chunkNode.countChildren(); j++) { SSFNode chunkChild = (SSFNode)chunkNode.getChildAt(j); if (chunkChild.getName().equals("PSP")) { return true; } } return isPSP; } boolean dochunkContainkA(SSFNode chunkNode) { boolean isKA = false; FeatureStructure fss = null; for (int j = 0; j < chunkNode.countChildren(); j++) { SSFNode chunkChild = (SSFNode)chunkNode.getChildAt(j); if (chunkChild.getFeatureStructures() != null) { FeatureStructures fs = chunkChild.getFeatureStructures(); for (int iCount = 0; iCount < fs.countAltFSValues(); iCount++) { fss = fs.getAltFSValue(iCount); int acount = fss.countAttributes(); String stCat = ""; String stRoot = ""; FeatureAttribute fa = fss.getAttribute(0); if ((fa != null) && (fa.getName().equals("root"))) { stRoot = fa.getAltValue(0).toString(); } fa = fss.getAttribute(1); if ((fa != null) && (fa.getName().equals("cat"))) { stCat = fa.getAltValue(0).toString(); } if ((stCat.equals("psp")) && (stRoot.equals("kA"))) { return true; } } } } return isKA; } ArrayList getGNCPSPnodekA(SSFNode chunkNode) { boolean isPSP = false; FeatureStructure fss = null; int chunkSize = chunkNode.countChildren(); ArrayList alFeatures = new ArrayList(); for (int j = 0; j < chunkNode.countChildren(); j++) { SSFNode chunkChild = (SSFNode)chunkNode.getChildAt(j); if (chunkChild.getFeatureStructures() != null) { FeatureStructures fs = chunkChild.getFeatureStructures(); for (int iCount = 0; iCount < fs.countAltFSValues(); iCount++) { fss = fs.getAltFSValue(iCount); int acount = fss.countAttributes(); String stCat = ""; String stRoot = ""; String[] gnc = { "", "", "" }; FeatureAttribute fa = fss.getAttribute(0); if ((fa != null) && (fa.getName().equals("root"))) { stRoot = fa.getAltValue(0).toString(); } fa = fss.getAttribute(1); if ((fa != null) && (fa.getName().equals("cat"))) { stCat = fa.getAltValue(0).toString(); } if ((stCat.equals("psp")) && (stRoot.equals("kA"))) { String stGender = fss.getAttribute(2).getAltValue(0).toString(); String stNumber = fss.getAttribute(3).getAltValue(0).toString(); String stCase = fss.getAttribute(5).getAltValue(0).toString(); gnc[0] = stGender; gnc[1] = stNumber; gnc[2] = stCase; alFeatures.add(gnc); } } } } return alFeatures; } void matchGNC(SSFNode chunkNode, ArrayList alGNC) throws Exception { FeatureStructure fss = null; StringBuilder sb = new StringBuilder(); for (int j = 0; j < chunkNode.countChildren(); j++) { SSFNode chunkChild = (SSFNode)chunkNode.getChildAt(j); String stNodeCat = chunkChild.getName(); if (stNodeCat.equals("NN")) { if (chunkChild.getFeatureStructures() != null) { FeatureStructures fs = chunkChild.getFeatureStructures(); for (int iCount = 0; iCount < fs.countAltFSValues(); iCount++) { fss = fs.getAltFSValue(iCount); int acount = fss.countAttributes(); String stGen = ""; String stNum = ""; String stCase = ""; String stRoot = ""; FeatureAttribute fa = fss.getAttribute(0); stRoot = fa.getAltValue(0).toString(); fa = fss.getAttribute(2); stGen = fa.getAltValue(0).toString(); fa = fss.getAttribute(3); stNum = fa.getAltValue(0).toString(); fa = fss.getAttribute(5); stCase = fa.getAltValue(0).toString(); for (int i = 0; i < alGNC.size(); i++) { String[] GNC = (String[])alGNC.get(i); if ((GNC[0].equals(stGen)) && (GNC[1].equals(stNum)) && (GNC[2].equals(stCase))) { String featureString = fss.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); chunkChild.setFeatureStructures(fsi); } } } } } } } void getRightHeM(SSFNode node) throws Exception { FeatureStructure fss = null; StringBuilder sb = new StringBuilder(); for (int j = 0; j < node.countChildren(); j++) { SSFNode chunkChild = (SSFNode)node.getChildAt(j); String stNodeCat = chunkChild.getName(); if (chunkChild.getFeatureStructures() != null) { FeatureStructures fs = chunkChild.getFeatureStructures(); for (int iCount = 0; iCount < fs.countAltFSValues(); iCount++) { fss = fs.getAltFSValue(iCount); int acount = fss.countAttributes(); String stGen = ""; String stNum = ""; String stPerson = ""; FeatureAttribute fa = fss.getAttribute(0); fa = fss.getAttribute(2); stGen = fa.getAltValue(0).toString(); fa = fss.getAttribute(3); stNum = fa.getAltValue(0).toString(); fa = fss.getAttribute(4); stPerson = fa.getAltValue(0).toString(); if ((stGen.equals("any")) && (stNum.equals("pl")) && (stPerson.equals("3"))) { String featureString = fss.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); chunkChild.setFeatureStructures(fsi); } } } } } void getCorrectFeatureForVGNFandVGF(SSFNode node, String stChunkName) throws Exception { for (int i = 0; i < node.countChildren(); i++) { SSFNode nodeChild = (SSFNode)node.getChildAt(i); FeatureStructure fs = null; String stLexCat = nodeChild.getName(); StringBuilder sb = new StringBuilder(); if (nodeChild.getFeatureStructures() != null) { FeatureStructures fss1 = nodeChild.getFeatureStructures(); for (int iCount = 0; iCount < fss1.countAltFSValues(); iCount++) { fs = fss1.getAltFSValue(iCount); int acount = fs.countAttributes(); for (int l = 0; l < acount; l++) { FeatureAttribute fa = fs.getAttribute(l); if ((fa != null) && (fa.getName().equals("tam"))) { FeatureValue fSuffix = fa.getAltValue(0); if ((stChunkName.equals("VGNF")) && ((stLexCat.equals("VM")) || (stLexCat.equals("VAUX"))) && (fSuffix != null) && (fSuffix.toString().equals("yA"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } else if ((stChunkName.equals("VGF")) && (stLexCat.equals("VAUX")) && (fSuffix != null) && (fSuffix.toString().equals("yA"))) { String featureString = fs.makeString(); sb.append(featureString + "|"); FeatureStructures fsi = new FeatureStructuresImpl(); fsi.readString(sb.toString()); nodeChild.setFeatureStructures(fsi); } } } } } else { System.out.println("There is no FeatureStructure"); } } } private static class Guess extends Thread { private Socket socket; private int clientNumber; private SSFReading reader; public Guess(Socket socket, int clientNumber, SSFReading reader) { this.socket = socket; this.clientNumber = clientNumber; this.reader = reader; //log.info("New connection with client# " + clientNumber + " at " + socket); } public void run() { try { // Decorate the streams so we can send characters // and not just bytes. Ensure output is flushed // after every newline. BufferedReader in = new BufferedReader( new InputStreamReader(socket.getInputStream())); PrintWriter out = new PrintWriter(socket.getOutputStream(), true); SSFStory story = new SSFStoryImpl(); String input = in.readLine(); File tmpFile = File.createTempFile("GuessMorphHin", ".tmp"); String tmpFileName = tmpFile.getAbsolutePath(); BufferedWriter bw = new BufferedWriter(new FileWriter(tmpFile)); Base64.Decoder decoder = Base64.getDecoder(); byte[] decodedBytes = decoder.decode(input); bw.write(new String(decodedBytes)); bw.close(); story.readFile(tmpFileName); reader.readSentence(story); story.save(tmpFileName, "UTF-8"); FileInputStream fis = new FileInputStream(tmpFileName); byte[] data = new byte[(int) tmpFile.length()]; fis.read(data); fis.close(); out.println(new String(data, "UTF-8")); tmpFile.delete(); } catch (IOException e) { //log.warn("Error handling client# " + clientNumber + ": " + e); } catch (Exception ex) { ex.printStackTrace(); } finally { try { socket.close(); } catch (IOException e) { //log.warn("Couldn't close a socket, what's going on?"); } //log.warn("Connection with client# " + clientNumber + " closed"); } } } }