/* REQUIRES the MetaMap Transfer Java libraries to compile javac -classpath "/data/umls/mmtx/nls/mmtx/lib/nlpProject.jar: \ /data/umls/mmtx/nls/mmtx/lib/mmtx.jar:/data/umls/mmtx/nls/mmtx/lib/mmtxProject.jar: \ /need_location/mysql-connector-java-3.0.9-stable-bin.jar \ $CLASSPATH" processGeo.java to run cd /data/umls/mmtx/nls setenv MMTX_PATH `pwd` source ${MMTX_PATH}/mmtx/config/cshrc cd /data/umls/mmtx/nls/mmtx/doc/userDoc java -Xmx512m -classpath "/data/umls/mmtx/nls/mmtx/lib/nlpProject.jar: \ /data/umls/mmtx/nls/mmtx/lvg2001/lib/jdbcDrivers/mm.mysql-2.0.6/mm.mysql-2.0.6.jar: \ /data/umls/mmtx/nls/mmtx/lib/mmtx.jar: \ /data/umls/mmtx/nls/mmtx/lib/mmtxProject.jar:$CLASSPATH" processGeo & to test in mysql select distinct a.gse, a.sui, b.STR from geo.gse_desc_sui a inner join umls.MRCON b where a.sui = b.SUI; Important note: The MMTX routines may unfortunately hang on long text items. This program only iterates on those GEO objects (samples, series, data sets) that do not already have mappings. This is done so that if the user finds the program hanging, he/she can stop the program and restart it, and this program will automatically move on to the next GEO object. This program accomplishes this by storing a NULL mapping first for the currently processed GEO object, so that if the program is terminated, it will not revisit the failed GEO object. Because of this, users will need to remove the NULL mappings after this program has completed. */ import java.io.*; import java.util.*; import java.sql.*; import java.math.*; import java.text.*; import java.net.*; import java.lang.*; import java.util.Iterator; import java.util.List; import java.util.Vector; import gov.nih.nlm.nls.mmtx.*; import gov.nih.nlm.nls.utils.*; import gov.nih.nlm.nls.nlp.textfeatures.*; /** * processGeo.java -- program that maps GEO annotations stored in MySQL tables * to UMLS concepts. Based on the MmtxApiSample.java -- MMTx API Sample Program * by Willie Rogers * * Created: Fri Oct 1 * By Atul Butte */ public class processGeo { static String db_connection = "jdbc:mysql://dblocation.org/geo"; static String db_user = "xxxx"; static String db_password = "xxxx"; static Connection dbConn = null; public static void main( String[] argv ) throws Exception { String parms[] = { "--freeText", "--no_acros_abbrs_only", "--no_derivational_variants", "--stop_large_n", "--filterToTarget", " --truncate_candidates_mappings", "--prefer_multiple_concepts" }; MMTxAPI mmtxApi = new MMTxAPI( parms ); try { Class.forName( "org.gjt.mm.mysql.Driver" ).newInstance(); } catch( Exception e ) { System.err.println( "Unable to load mm.mysql driver." ); e.printStackTrace( ); } try { dbConn = DriverManager.getConnection( db_connection, db_user, db_password ); } catch (Throwable t) { t.printStackTrace(); return; } String geo_table[] = { "gds_title", "gse_title", "gse_desc", "gsm_title", "gsm_desc", "gsm_source", "gsm_keyword" }; String geo_key[] = { "gds", "gse", "gse", "gsm", "gsm", "gsm", "gsm" }; String geo_text[] = { "title", "title", "description", "title", "description", "source", "keyword" }; boolean geo_document[] = { false, false, true, false, true, false, false }; Statement stmt = dbConn.createStatement( ); for( int i = 0; i < geo_table.length; i++ ) { stmt.executeUpdate( "drop table if exists " + geo_table[i] + "_sui_new " ); stmt.executeUpdate( "create table if not exists " + geo_table[i] + "_sui_new " + " (" + geo_key[i] + " int, sui varchar(8), phrase varchar(64), score int, index(" + geo_key[i] + "), index(sui))" ); } PreparedStatement s = null; s = dbConn.prepareStatement( "create table if not exists gse_desc_sui_new (gse int, sui char(10), index(gse), index(sui))" ); s.executeUpdate( ); PreparedStatement insert_gse_desc_sui = dbConn.prepareStatement( "insert into gse_desc_sui_new set gse = ?, sui = ?" ); s = dbConn.prepareStatement( "create table if not exists gsm_desc_sui_new (gsm int, sui char(10), index(gsm), index(sui))" ); s.executeUpdate( ); PreparedStatement insert_gsm_desc_sui = dbConn.prepareStatement( "insert into gsm_desc_sui_new set gsm = ?, sui = ?" ); s = dbConn.prepareStatement( "create table if not exists gse_title_sui_new (gse int, sui char(10), index(gse), index(sui))" ); s.executeUpdate( ); PreparedStatement insert_gse_title_sui = dbConn.prepareStatement( "insert into gse_title_sui_new set gse = ?, sui = ?" ); s = dbConn.prepareStatement( "create table if not exists gsm_title_sui_new (gsm int, sui char(10), index(gsm), index(sui))" ); s.executeUpdate( ); PreparedStatement insert_gsm_title_sui = dbConn.prepareStatement( "insert into gsm_title_sui_new set gsm = ?, sui = ?" ); s = dbConn.prepareStatement( "create table if not exists gsm_keyword_sui_new (gsm int, sui char(10), index(gsm), index(sui))" ); s.executeUpdate( ); PreparedStatement insert_gsm_keyword_sui = dbConn.prepareStatement( "insert into gsm_keyword_sui_new set gsm = ?, sui = ?" ); s = dbConn.prepareStatement( "create table if not exists gsm_source_sui_new (gsm int, sui char(10), index(gsm), index(sui))" ); s.executeUpdate( ); PreparedStatement insert_gsm_source_sui = dbConn.prepareStatement( "insert into gsm_source_sui_new set gsm = ?, sui = ?" ); for( int i = 0; i < geo_table.length; i++ ) { System.out.print( "Working on " + geo_table[i] ); if( geo_document[i] ) System.out.println( " documents" ); else System.out.println( " strings" ); try { String input = null; int index = 0; String query = "select distinct a." + geo_key[i] + ", a." + geo_text[i] + " from " + geo_table[i] + " as a left join " + geo_table[i] + "_sui_new as b on " + " a." + geo_key[i] + " = b." + geo_key[i] + " where b." + geo_key[i] + " is null order by a." + geo_key[i]; ResultSet rset = stmt.executeQuery( query ); query = "insert into " + geo_table[i] + "_sui_new " + " set " + geo_key[i] + " = ?, sui = ?, phrase = ?, score = ? "; PreparedStatement insert = dbConn.prepareStatement( query ); while( rset.next( ) ) { System.out.println( DateFormat.getDateTimeInstance( ).format( new java.util.Date( ) ) ); try { index = rset.getInt( 1 ); input = rset.getString( 2 ); System.out.print( geo_table[i] + " " + index + ": " ); System.out.println( input ); System.out.flush( ); insert.setInt( 1, index ); insert.setNull( 2, java.sql.Types.CHAR ); insert.setNull( 3, java.sql.Types.CHAR ); insert.setNull( 4, java.sql.Types.INTEGER ); insert.executeUpdate( ); System.gc(); if( input == null ) continue; Iterator phraseIterator = null; if( geo_document[i] ) { gov.nih.nlm.nls.nlp.textfeatures.Document doc = new Document( new StringBuffer( input ) ); mmtxApi.processDocument( doc ); phraseIterator = doc.getPhrases().iterator(); } else { gov.nih.nlm.nls.nlp.textfeatures.Sentence sent = mmtxApi.processString( input, false ); phraseIterator = sent.getPhrases().iterator(); } while (phraseIterator.hasNext()) { Phrase phrase = (Phrase)phraseIterator.next(); System.out.println( phrase ); Vector list = phrase.getCandidateList(); if( list == null ) continue; Iterator candidateIterator = list.iterator(); if( candidateIterator == null ) continue; while (candidateIterator.hasNext()) { Candidate c = (Candidate) candidateIterator.next( ); if( c == null ) break; insert.setInt( 1, index ); insert.setString( 2, c.getSUI( ) ); insert.setString( 3, phrase.getOriginalString( ) ); insert.setInt( 4, c.getFinalScore( ) ); insert.executeUpdate( ); } list = phrase.getFinalCandidateList(); if( list == null ) continue; candidateIterator = list.iterator(); if( candidateIterator == null ) continue; while (candidateIterator.hasNext()) { Candidate c = (Candidate) candidateIterator.next( ); if( c == null ) break; } } System.gc(); } catch( Throwable e1 ) { System.err.print( "Error dealing with " + geo_table[i] + " " + index ); if( input != null ) { System.err.print( ": \"" + input + "\"" ); } System.err.print( "\n" ); e1.printStackTrace(); } } } catch (Exception e) { System.err.println("Major exception: " + e.toString() ); e.printStackTrace(); } } mmtxApi.finalize(); } }