import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.h2.util.IOUtils;
import pl.edu.icm.model.bwmeta.y.YContentEntry;
import pl.edu.icm.model.bwmeta.y.YContentFile;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.model.transformers.bwmeta.y.BwmetaTransformerConstants;
import pl.edu.icm.synat.api.services.index.fulltext.FulltextIndexService;
import pl.edu.icm.synat.api.services.index.fulltext.query.FulltextSearchQuery;
import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.SearchOperator;
import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.impl.FieldCriterion;
import pl.edu.icm.synat.api.services.index.fulltext.query.format.FieldRequest;
import pl.edu.icm.synat.api.services.index.fulltext.query.format.ResultsFormat;
import pl.edu.icm.synat.api.services.index.fulltext.result.FulltextSearchResult;
import pl.edu.icm.synat.api.services.index.fulltext.result.FulltextSearchResults;
import pl.edu.icm.synat.api.services.store.PartType;
import pl.edu.icm.synat.api.services.store.StatelessStore;
import pl.edu.icm.synat.api.services.store.model.AbstractRecordPart;
import pl.edu.icm.synat.api.services.store.model.Record;
import pl.edu.icm.synat.api.services.store.model.RecordId;
import pl.edu.icm.synat.api.services.store.model.TextRecordPart;
import pl.edu.icm.synat.api.services.store.model.batch.operations.AddOrUpdateRecordBinaryPart;
import pl.edu.icm.synat.api.services.store.model.batch.operations.AddOrUpdateRecordTextPart;
import pl.edu.icm.synat.api.services.store.model.batch.operations.BatchOperations;
import pl.edu.icm.synat.application.model.bwmeta.utils.BwmetaConverterUtils;
import pl.edu.icm.synat.console.scripting.utils.ServicesUtils;
import pl.edu.icm.synat.logic.index.publication.CollectionIndexFieldConstants;
def csvFile = "/home/mkali/journals_list_oct18_2012-1.csv";
String line = "";
String cvsSplitBy = "#";
BufferedReader br = new BufferedReader(new FileReader(csvFile));
while ((line = br.readLine()) != null) {
String[] cells = line.split(cvsSplitBy);
String issn = cells[3];
String eissn = cells[4];
String fileName = issn;
if (StringUtils.isEmpty(fileName) || "n/a".equals(fileName)) {
fileName = eissn;
}
String aboutUrl = cells[7];
String aboutContent = fetchRemoteContent(aboutUrl);
if (aboutContent != null) {
String imgTag = StringUtils.substringBetween(aboutContent, "
", "
");
//System.out.println(imgTag);
String imageUrlRegex = "
]+src\\s*=\\s*['\"]([^'\"]+)['\"][^>]*>";
Pattern imageUrlPattern = Pattern.compile(imageUrlRegex);
Matcher m = imageUrlPattern.matcher(imgTag);
if (m.find()) {
String imageUrl = m.group(1);
if (!imageUrl.startsWith("http://")) {
imageUrl = "http:" + imageUrl;
}
processRecord(issn, eissn, imageUrl);
}
}
}
private String fetchRemoteContent(String path) {
GetMethod method = null;
try {
URL url = new URL(path);
String fileId = url.getFile();
HttpClient httpClient = new HttpClient();
method = new GetMethod(path);
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
// Execute the method.
int statusCode = httpClient.executeMethod(method);
if (statusCode != HttpStatus.SC_OK) {
throw new HttpException("Method failed: "
+ method.getStatusLine());
}
// Read the response body.
byte[] responseBody = method.getResponseBody();
return new String(responseBody);
} catch (Exception e) {
return null;
}
}
private byte[] fetchFileContent(String path) {
GetMethod method = null;
URL url = new URL(path);
String fileId = url.getFile();
HttpClient httpClient = new HttpClient();
method = new GetMethod(path);
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
// Execute the method.
int statusCode = httpClient.executeMethod(method);
if (statusCode != HttpStatus.SC_OK) {
throw new HttpException("Method failed: "
+ method.getStatusLine());
}
// Read the response body.
byte[] responseBody = method.getResponseBody();
return responseBody;
}
private void processRecord(String issn, String eissn, String imageURL) {
System.out.println("issn=" + issn + ", eissn=" + eissn);
//ServicesUtils serviceUtils = new ServicesUtils();
FulltextIndexService index = serviceUtils.getService("CollectionIndex", FulltextIndexService.class);
ResultsFormat resultsFormat = new ResultsFormat(new FieldRequest(CollectionIndexFieldConstants.FIELD_EXID, true));
FulltextSearchQuery searchQuery = null;
if (StringUtils.isNotEmpty(issn) && !"n/a".equals(issn)) {
searchQuery = new FulltextSearchQuery(0, 10, resultsFormat, new FieldCriterion("externalIdentifier_#_bwmeta1.id-class.ISSN", issn, SearchOperator.AND), new FieldCriterion("level", "bwmeta1.level.hierarchy_Journal_Journal", SearchOperator.AND));
} else if (StringUtils.isNotEmpty(eissn) && !"n/a".equals(eissn)) {
searchQuery = new FulltextSearchQuery(0, 10, resultsFormat, new FieldCriterion("externalIdentifier_#_bwmeta1.id-class.EISSN", eissn, SearchOperator.AND), new FieldCriterion("level", "bwmeta1.level.hierarchy_Journal_Journal", SearchOperator.AND));
}
if (searchQuery != null) {
FulltextSearchResults fulltextSearchResults = index.performSearch(searchQuery);
List results = fulltextSearchResults.getResults();
if (results.size() == 1) {
String docId = results.get(0).getDocId();
URL url = new URL(imageURL);
String filePath = url.getFile();
String fileId = (new File(filePath)).getName();
byte[] data = fetchFileContent(imageURL);
updateRecord(docId, data, fileId, "image/jpeg");
} else {
System.out.println("Wrong results size: " + results.size());
}
}
}
private void updateRecord(String docId, byte[] data, String fileId, String mimeType) {
StatelessStore store = serviceUtils.getService("Store", StatelessStore.class);
Record record = store.fetchRecord(new RecordId(docId), "metadata/BWmeta-2.1.0");
AbstractRecordPart part = record.getPart("metadata/BWmeta-2.1.0");
YElement element = BwmetaConverterUtils.bwmetaToYElement(((TextRecordPart)part).getTextContent());
boolean hasCover = false;
for (YContentEntry entry:element.getContents()) {
if (entry.isFile()) {
YContentFile contentFile = (YContentFile)entry;
if ("cover".equals(contentFile.getType())) {
hasCover = true;
}
}
}
if (!hasCover) {
List locations = new ArrayList();
locations.add(fileId);
YContentFile newCover = new YContentFile(fileId, "cover", mimeType, locations);
element.getContents().add(newCover);
String newBwmeta = BwmetaConverterUtils.YElementToBwmeta(element, BwmetaTransformerConstants.BWMETA_2_1);
BatchOperations operationsToExecute = new BatchOperations();
operationsToExecute.getOperations().add(new AddOrUpdateRecordTextPart(record.getIdentifier(), PartType.SOURCE, "metadata/BWmeta-2.1.0", newBwmeta, part.getTags().toArray(new String[0])));
String[] tags = ["type:source", "mime:image/jpeg"];
operationsToExecute.getOperations().add(new AddOrUpdateRecordBinaryPart(record.getIdentifier(), PartType.SOURCE, fileId, new ByteArrayInputStream(data), tags));
store.executeBatch(operationsToExecute);
System.out.println("Record " + record.getIdentifier() + " updated");
}
}