Skip to content

Commit e7b8280

Browse files
Add files via upload
1 parent 6e12acd commit e7b8280

File tree

3 files changed

+131
-0
lines changed

3 files changed

+131
-0
lines changed

IndexCSVFilters.java

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
package com.solr.index;
2+
3+
import org.apache.solr.client.solrj.SolrClient;
4+
import org.apache.solr.client.solrj.SolrServerException;
5+
import org.apache.solr.client.solrj.impl.HttpSolrClient;
6+
import org.apache.solr.common.SolrInputDocument;
7+
import org.slf4j.Logger;
8+
import org.slf4j.LoggerFactory;
9+
10+
import java.io.*;
11+
import java.nio.charset.StandardCharsets;
12+
import java.text.ParseException;
13+
import java.text.SimpleDateFormat;
14+
import java.util.ArrayList;
15+
import java.util.Collection;
16+
import java.util.TimeZone;
17+
18+
public class IndexCSVFilters {
19+
20+
private SolrClient solrClient;
21+
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
22+
private SimpleDateFormat solrCompatibleSdf,csvCompatibleSdf;
23+
private static final Logger logger = LoggerFactory.getLogger(IndexCSVFilters.class);
24+
25+
private static final String TIME_STAMP = "T00:00:00Z";
26+
private static final String DATE_STAMP = "1970-01-01T";
27+
private static final String DATE_TIME_FIELD = "DateTime";
28+
private static final String TIME_FIELD = "Time";
29+
private static final String ID_FIELD = "id";
30+
private static final String DATE_PART_Z = "Z";
31+
// private static final String DATA_SET_NAME_FIELD = "DataSetName";
32+
public IndexCSVFilters(String solrUrl, String coreName){
33+
String urlString = solrUrl+"/"+coreName;
34+
solrClient = new HttpSolrClient.Builder(urlString)
35+
.withSocketTimeout(0)
36+
.withConnectionTimeout(0)
37+
.build();
38+
solrCompatibleSdf = new SimpleDateFormat("YYYY-MM-dd'T'HH:mm:ss'Z'");
39+
solrCompatibleSdf.setTimeZone(UTC);
40+
csvCompatibleSdf = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss aa");
41+
csvCompatibleSdf.setTimeZone(UTC);
42+
}
43+
public boolean indexCSVFile(File file){
44+
45+
logger.info("Indexing Started for file : "+file.getName());
46+
try(BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8))) {
47+
48+
String line = null;
49+
line = bufferedReader.readLine();
50+
String sep = ";";
51+
if(line.contains(",")){
52+
sep = ",";
53+
}
54+
String fieldNames[] = line.split(sep);
55+
Collection<SolrInputDocument> docList = new ArrayList<>();
56+
// DataSetName,TimeStamp,First,Max,Min,Last,ValB,ValA,FilterS,FilterE,FilterT,ValS,ValC
57+
// E8374H231J#Type0,2017-01-09 09:31:00,66.2,71,66.2,71,66.2,71,35,2017-01-13,C,103.42,0
58+
// E8374H231J#Type0,2017-01-09 09:32:00,66.2,71,66.2,71,66.2,71,35,2017-01-13,C,103.57,0
59+
int cnt =0;
60+
int lineCnt =0;
61+
while ((line = bufferedReader.readLine())!=null){
62+
lineCnt++;
63+
if(line.trim().length()==0){
64+
continue;
65+
}
66+
String data [] = line.trim().split(sep);
67+
if(data.length!=fieldNames.length){
68+
System.out.println(lineCnt+"\t"+line);
69+
continue;
70+
}
71+
SolrInputDocument solrInputDocument = new SolrInputDocument();
72+
for(int index = 0;index<fieldNames.length;index++){
73+
74+
if(index==1 || index==9){
75+
continue;
76+
}
77+
solrInputDocument.addField(fieldNames[index],data[index]);
78+
}
79+
String time = data[1].substring(data[1].indexOf(" ")+1);
80+
81+
solrInputDocument.addField(TIME_FIELD,DATE_STAMP+time+ DATE_PART_Z);
82+
solrInputDocument.addField(DATE_TIME_FIELD,data[1].replace(" ","T")+ DATE_PART_Z);
83+
solrInputDocument.addField(fieldNames[9],data[9]+TIME_STAMP);
84+
solrInputDocument.addField(ID_FIELD,data[1].replace(" ","_")+"_"+data[0]+"_"+data[8]+"_"+data[9]+"_"+data[10]);
85+
docList.add(solrInputDocument);
86+
87+
if(cnt==100000){
88+
cnt = 0;
89+
solrClient.add(docList);
90+
solrClient.commit();
91+
docList.clear();
92+
}else{
93+
cnt++;
94+
}
95+
}
96+
if(cnt!=0){
97+
solrClient.add(docList);
98+
solrClient.commit();
99+
docList.clear();
100+
}
101+
} catch (FileNotFoundException e) {
102+
e.printStackTrace();
103+
logger.error("Error in Indexing file : "+file.getName(),e);
104+
} catch (IOException e) {
105+
e.printStackTrace();
106+
logger.error("Error in Indexing file : "+file.getName(),e);
107+
} catch (SolrServerException e) {
108+
e.printStackTrace();
109+
logger.error("Error in Indexing file : "+file.getName(),e);
110+
}
111+
logger.info("Indexing Completed for file : "+file.getName());
112+
return true;
113+
}
114+
115+
public void optimizeIndex(){
116+
try {
117+
logger.info("Index Optimization Process Start");
118+
solrClient.commit();
119+
solrClient.optimize();
120+
logger.info("Index Optimization Process Completed");
121+
} catch (SolrServerException e) {
122+
e.printStackTrace();
123+
logger.error("Error in Index optimization process",e);
124+
} catch (IOException e) {
125+
e.printStackTrace();
126+
logger.error("Error in Index optimization process",e);
127+
}
128+
129+
}
130+
131+
}

conf.zip

138 KB
Binary file not shown.

csv_samples.zip

13.7 MB
Binary file not shown.

0 commit comments

Comments
 (0)