|
| 1 | +package com.solr.index; |
| 2 | + |
| 3 | +import org.apache.solr.client.solrj.SolrClient; |
| 4 | +import org.apache.solr.client.solrj.SolrServerException; |
| 5 | +import org.apache.solr.client.solrj.impl.HttpSolrClient; |
| 6 | +import org.apache.solr.common.SolrInputDocument; |
| 7 | +import org.slf4j.Logger; |
| 8 | +import org.slf4j.LoggerFactory; |
| 9 | + |
| 10 | +import java.io.*; |
| 11 | +import java.nio.charset.StandardCharsets; |
| 12 | +import java.text.ParseException; |
| 13 | +import java.text.SimpleDateFormat; |
| 14 | +import java.util.ArrayList; |
| 15 | +import java.util.Collection; |
| 16 | +import java.util.TimeZone; |
| 17 | + |
| 18 | +public class IndexCSVFilters { |
| 19 | + |
| 20 | + private SolrClient solrClient; |
| 21 | + private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); |
| 22 | + private SimpleDateFormat solrCompatibleSdf,csvCompatibleSdf; |
| 23 | + private static final Logger logger = LoggerFactory.getLogger(IndexCSVFilters.class); |
| 24 | + |
| 25 | + private static final String TIME_STAMP = "T00:00:00Z"; |
| 26 | + private static final String DATE_STAMP = "1970-01-01T"; |
| 27 | + private static final String DATE_TIME_FIELD = "DateTime"; |
| 28 | + private static final String TIME_FIELD = "Time"; |
| 29 | + private static final String ID_FIELD = "id"; |
| 30 | + private static final String DATE_PART_Z = "Z"; |
| 31 | +// private static final String DATA_SET_NAME_FIELD = "DataSetName"; |
| 32 | + public IndexCSVFilters(String solrUrl, String coreName){ |
| 33 | + String urlString = solrUrl+"/"+coreName; |
| 34 | + solrClient = new HttpSolrClient.Builder(urlString) |
| 35 | + .withSocketTimeout(0) |
| 36 | + .withConnectionTimeout(0) |
| 37 | + .build(); |
| 38 | + solrCompatibleSdf = new SimpleDateFormat("YYYY-MM-dd'T'HH:mm:ss'Z'"); |
| 39 | + solrCompatibleSdf.setTimeZone(UTC); |
| 40 | + csvCompatibleSdf = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss aa"); |
| 41 | + csvCompatibleSdf.setTimeZone(UTC); |
| 42 | + } |
| 43 | + public boolean indexCSVFile(File file){ |
| 44 | + |
| 45 | + logger.info("Indexing Started for file : "+file.getName()); |
| 46 | + try(BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8))) { |
| 47 | + |
| 48 | + String line = null; |
| 49 | + line = bufferedReader.readLine(); |
| 50 | + String sep = ";"; |
| 51 | + if(line.contains(",")){ |
| 52 | + sep = ","; |
| 53 | + } |
| 54 | + String fieldNames[] = line.split(sep); |
| 55 | + Collection<SolrInputDocument> docList = new ArrayList<>(); |
| 56 | +// DataSetName,TimeStamp,First,Max,Min,Last,ValB,ValA,FilterS,FilterE,FilterT,ValS,ValC |
| 57 | +// E8374H231J#Type0,2017-01-09 09:31:00,66.2,71,66.2,71,66.2,71,35,2017-01-13,C,103.42,0 |
| 58 | +// E8374H231J#Type0,2017-01-09 09:32:00,66.2,71,66.2,71,66.2,71,35,2017-01-13,C,103.57,0 |
| 59 | + int cnt =0; |
| 60 | + int lineCnt =0; |
| 61 | + while ((line = bufferedReader.readLine())!=null){ |
| 62 | + lineCnt++; |
| 63 | + if(line.trim().length()==0){ |
| 64 | + continue; |
| 65 | + } |
| 66 | + String data [] = line.trim().split(sep); |
| 67 | + if(data.length!=fieldNames.length){ |
| 68 | + System.out.println(lineCnt+"\t"+line); |
| 69 | + continue; |
| 70 | + } |
| 71 | + SolrInputDocument solrInputDocument = new SolrInputDocument(); |
| 72 | + for(int index = 0;index<fieldNames.length;index++){ |
| 73 | + |
| 74 | + if(index==1 || index==9){ |
| 75 | + continue; |
| 76 | + } |
| 77 | + solrInputDocument.addField(fieldNames[index],data[index]); |
| 78 | + } |
| 79 | + String time = data[1].substring(data[1].indexOf(" ")+1); |
| 80 | + |
| 81 | + solrInputDocument.addField(TIME_FIELD,DATE_STAMP+time+ DATE_PART_Z); |
| 82 | + solrInputDocument.addField(DATE_TIME_FIELD,data[1].replace(" ","T")+ DATE_PART_Z); |
| 83 | + solrInputDocument.addField(fieldNames[9],data[9]+TIME_STAMP); |
| 84 | + solrInputDocument.addField(ID_FIELD,data[1].replace(" ","_")+"_"+data[0]+"_"+data[8]+"_"+data[9]+"_"+data[10]); |
| 85 | + docList.add(solrInputDocument); |
| 86 | + |
| 87 | + if(cnt==100000){ |
| 88 | + cnt = 0; |
| 89 | + solrClient.add(docList); |
| 90 | + solrClient.commit(); |
| 91 | + docList.clear(); |
| 92 | + }else{ |
| 93 | + cnt++; |
| 94 | + } |
| 95 | + } |
| 96 | + if(cnt!=0){ |
| 97 | + solrClient.add(docList); |
| 98 | + solrClient.commit(); |
| 99 | + docList.clear(); |
| 100 | + } |
| 101 | + } catch (FileNotFoundException e) { |
| 102 | + e.printStackTrace(); |
| 103 | + logger.error("Error in Indexing file : "+file.getName(),e); |
| 104 | + } catch (IOException e) { |
| 105 | + e.printStackTrace(); |
| 106 | + logger.error("Error in Indexing file : "+file.getName(),e); |
| 107 | + } catch (SolrServerException e) { |
| 108 | + e.printStackTrace(); |
| 109 | + logger.error("Error in Indexing file : "+file.getName(),e); |
| 110 | + } |
| 111 | + logger.info("Indexing Completed for file : "+file.getName()); |
| 112 | + return true; |
| 113 | + } |
| 114 | + |
| 115 | + public void optimizeIndex(){ |
| 116 | + try { |
| 117 | + logger.info("Index Optimization Process Start"); |
| 118 | + solrClient.commit(); |
| 119 | + solrClient.optimize(); |
| 120 | + logger.info("Index Optimization Process Completed"); |
| 121 | + } catch (SolrServerException e) { |
| 122 | + e.printStackTrace(); |
| 123 | + logger.error("Error in Index optimization process",e); |
| 124 | + } catch (IOException e) { |
| 125 | + e.printStackTrace(); |
| 126 | + logger.error("Error in Index optimization process",e); |
| 127 | + } |
| 128 | + |
| 129 | + } |
| 130 | + |
| 131 | +} |
0 commit comments