import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import pl.edu.icm.model.bwmeta.y.YDescription;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.model.bwmeta.y.YLanguage;
import pl.edu.icm.model.transformers.bwmeta.y.BwmetaTransformerConstants;
import pl.edu.icm.synat.api.services.index.fulltext.FulltextIndexService;
import pl.edu.icm.synat.api.services.index.fulltext.query.FulltextSearchQuery;
import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.SearchOperator;
import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.impl.FieldCriterion;
import pl.edu.icm.synat.api.services.index.fulltext.query.format.FieldRequest;
import pl.edu.icm.synat.api.services.index.fulltext.query.format.ResultsFormat;
import pl.edu.icm.synat.api.services.index.fulltext.result.FulltextSearchResult;
import pl.edu.icm.synat.api.services.index.fulltext.result.FulltextSearchResults;
import pl.edu.icm.synat.api.services.store.PartType;
import pl.edu.icm.synat.api.services.store.StatelessStore;
import pl.edu.icm.synat.api.services.store.model.AbstractRecordPart;
import pl.edu.icm.synat.api.services.store.model.Record;
import pl.edu.icm.synat.api.services.store.model.RecordId;
import pl.edu.icm.synat.api.services.store.model.TextRecordPart;
import pl.edu.icm.synat.api.services.store.model.batch.operations.AddOrUpdateRecordTextPart;
import pl.edu.icm.synat.api.services.store.model.batch.operations.BatchOperations;
import pl.edu.icm.synat.application.model.bwmeta.utils.BwmetaConverterUtils;
import pl.edu.icm.synat.console.scripting.utils.ServicesUtils;
import pl.edu.icm.synat.logic.index.publication.CollectionIndexFieldConstants;
String csvFile = "/home/mkali/journals_list_oct18_2012-1.csv";
BufferedReader br = null;
String line = "";
String cvsSplitBy = "#";
try {
br = new BufferedReader(new FileReader(csvFile));
while ((line = br.readLine()) != null) {
// use comma as separator
String[] cells = line.split(cvsSplitBy);
String issn = cells[3];
String eissn = cells[4];
String aboutUrl = cells[7];
aboutUrl = aboutUrl + "?detailsPage=aboutThis";
System.out.println(aboutUrl);
try {
String aboutContent = fetchRemoteContent(aboutUrl);
Document document = Jsoup.parse(aboutContent);
Elements divs = document.select("div[class=colLeftContentContainer]");
if (divs.size() == 1) {
Element element = divs.get(0);
String preparedText = prepareText(element);
if (StringUtils.isNotEmpty(preparedText)) {
updateRecord(issn, eissn, preparedText);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
private String prepareText(Element div) {
StringBuffer stringBuffer = new StringBuffer();
for (Element child:div.children()) {
if ("springerHTML".equals(child.attr("class"))) {
stringBuffer.append(child.html());
stringBuffer.append("
");
} else if ("p".equals(child.tagName())) {
return stringBuffer.toString();
}
}
return stringBuffer.toString();
}
private String fetchRemoteContent(String path) throws HttpException,
IOException {
GetMethod method = null;
try {
URL url = new URL(path);
String fileId = url.getFile();
HttpClient httpClient = new HttpClient();
method = new GetMethod(path);
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
// Execute the method.
int statusCode = httpClient.executeMethod(method);
if (statusCode != HttpStatus.SC_OK) {
throw new HttpException("Method failed: "
+ method.getStatusLine());
}
// Read the response body.
byte[] responseBody = method.getResponseBody();
return new String(responseBody);
} finally {
if (method != null) {
method.releaseConnection();
}
}
}
private void updateRecord(String issn, String eissn, String data) {
FulltextIndexService index = (FulltextIndexService)serviceUtils.getService("CollectionIndex", FulltextIndexService.class);
ResultsFormat resultsFormat = new ResultsFormat(new FieldRequest(CollectionIndexFieldConstants.FIELD_EXID, true));
FulltextSearchQuery searchQuery = null;
if (StringUtils.isNotEmpty(issn)) {
searchQuery = new FulltextSearchQuery(0, 10, resultsFormat, new FieldCriterion("externalIdentifier_#_bwmeta1.id-class.ISSN", issn, SearchOperator.AND), new FieldCriterion("level", "bwmeta1.level.hierarchy_Journal_Journal", SearchOperator.AND));
} else if (StringUtils.isNotEmpty(eissn)) {
searchQuery = new FulltextSearchQuery(0, 10, resultsFormat, new FieldCriterion("externalIdentifier_#_bwmeta1.id-class.EISSN", eissn, SearchOperator.AND), new FieldCriterion("level", "bwmeta1.level.hierarchy_Journal_Journal", SearchOperator.AND));
}
if (searchQuery != null) {
FulltextSearchResults fulltextSearchResults = index.performSearch(searchQuery);
List results = fulltextSearchResults.getResults();
for (FulltextSearchResult result:results) {
System.out.println(result.getDocId());
}
if (results.size() == 1) {
String docId = results.get(0).getDocId();
StatelessStore store = (StatelessStore)serviceUtils.getService("Store", StatelessStore.class);
Record record = store.fetchRecord(new RecordId(docId), "metadata/BWmeta-2.1.0");
AbstractRecordPart part = record.getPart("metadata/BWmeta-2.1.0");
YElement element = BwmetaConverterUtils.bwmetaToYElement(((TextRecordPart)part).getTextContent());
boolean hasAbstract = false;
for (YDescription entry:element.getDescriptions()) {
if ("abstract".equals(entry.getType())) {
hasAbstract = true;
}
}
if (!hasAbstract) {
YDescription newAbstract = new YDescription(YLanguage.English, data, "abstract");
element.getDescriptions().add(newAbstract);
String newBwmeta = BwmetaConverterUtils.YElementToBwmeta(element, BwmetaTransformerConstants.BWMETA_2_1);
BatchOperations operationsToExecute = new BatchOperations();
operationsToExecute.getOperations().add(new AddOrUpdateRecordTextPart(record.getIdentifier(), PartType.SOURCE, "metadata/BWmeta-2.1.0", newBwmeta, part.getTags().toArray(new String[0])));
store.executeBatch(operationsToExecute);
System.out.println("Record " + record.getIdentifier() + " updated");
} else {
System.out.println("Wrong results size: " + results.size());
}
}
}
}