import pl.edu.icm.synat.api.services.store.StatelessStore import pl.edu.icm.synat.api.services.store.model.RecordConditions import pl.edu.icm.synat.process.common.enrich.impl.sonca.SoncaMetadataEnricher import org.springframework.web.client.RestTemplate import pl.edu.icm.model.bwmeta.y.YContentEntry; import pl.edu.icm.model.bwmeta.y.YContentFile; import pl.edu.icm.model.bwmeta.y.YDescription; import pl.edu.icm.model.bwmeta.y.YElement; import pl.edu.icm.model.bwmeta.y.YExportable; import pl.edu.icm.model.bwmeta.y.YLanguage; import pl.edu.icm.model.transformers.bwmeta.y.BwmetaTransformerConstants; import pl.edu.icm.synat.api.services.index.fulltext.FulltextIndexService; import pl.edu.icm.synat.api.services.index.fulltext.query.FulltextSearchQuery; import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.SearchOperator; import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.impl.FieldCriterion; import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.impl.FieldRangeCriterion; import pl.edu.icm.synat.api.services.index.fulltext.query.criteria.impl.BooleanCriterion; import pl.edu.icm.synat.api.services.index.fulltext.query.format.FieldRequest; import pl.edu.icm.synat.api.services.index.fulltext.query.format.ResultsFormat; import pl.edu.icm.synat.api.services.index.fulltext.result.FulltextSearchResult; import pl.edu.icm.synat.api.services.index.fulltext.result.FulltextSearchResults; import pl.edu.icm.synat.api.services.index.fulltext.result.ResultField; import pl.edu.icm.synat.api.services.store.model.RecordId; import pl.edu.icm.synat.process.common.repository.DocumentRepository; import pl.edu.icm.synat.process.common.repository.DefaultDocumentRepositoryBuilder; import pl.edu.icm.synat.process.common.model.api.Document; import pl.edu.icm.synat.process.common.model.api.NativeDocument; import org.apache.commons.io.IOUtils import org.apache.commons.io.filefilter.WildcardFileFilter; import org.apache.commons.io.FileUtils; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Properties; import pl.edu.icm.model.bwmeta.y.YContentFile; import pl.edu.icm.model.bwmeta.y.YCurrent; import pl.edu.icm.model.bwmeta.y.YDate; import pl.edu.icm.model.bwmeta.y.YDescription; import pl.edu.icm.model.bwmeta.y.YElement; import pl.edu.icm.model.bwmeta.y.YLanguage; import pl.edu.icm.model.bwmeta.y.YName; import pl.edu.icm.model.bwmeta.y.YStructure; import pl.edu.icm.model.bwmeta.y.YTagList; import pl.edu.icm.model.bwmeta.y.constants.DateTypes; import pl.edu.icm.model.bwmeta.y.constants.DescriptionTypes; import pl.edu.icm.model.bwmeta.y.constants.FileTypes; import pl.edu.icm.model.bwmeta.y.constants.HierarchyWithLevelIds; import pl.edu.icm.model.bwmeta.y.constants.TagTypes; import pl.edu.icm.synat.application.exception.GeneralBusinessException; import pl.edu.icm.synat.logic.model.general.VideoConstants; import pl.edu.icm.synat.logic.model.utils.YModelUtils; import pl.edu.icm.model.bwmeta.y.constants.MiscellaneousConstants import pl.edu.icm.model.transformers.bwmeta.y.BwmetaTransformerConstants import pl.edu.icm.synat.api.services.store.PartType import pl.edu.icm.synat.api.services.store.StatelessStore import pl.edu.icm.synat.api.services.store.StoreClient import pl.edu.icm.synat.api.services.store.model.RecordId import pl.edu.icm.synat.api.services.store.model.batch.BatchBuilder import pl.edu.icm.synat.api.services.store.model.batch.impl.DefaultStoreClient import pl.edu.icm.synat.application.model.bwmeta.utils.BwmetaConverterUtils import pl.edu.icm.synat.logic.services.repository.constants.RepositoryStoreConstants import pl.edu.icm.synat.logic.model.general.VideoConstants store = serviceUtils.getService('Store', StatelessStore.class) // this should be set to true if transliteration is required transliterationRequired = false; // TODO change this directory // directory to read data to commit path = "/tmp/IcmTv/"; List textFiles = new ArrayList(); File dir = new File(path); for (File file : dir.listFiles()) { if (file.getName().contains(".properties")) { String id = file.getName().split("\\.")[0]; speechToText = preparedTranscription(id); storeFileMetadata(id, speechToText); // //new PrintStream(System.out, true, "UTF-8").println(speechToText); } } def prepareProperties(id) { Properties prop = new Properties(); InputStream input = null; try { input = new FileInputStream(id+".properties"); prop.load(new InputStreamReader(input, "UTF-8")); } catch (IOException ex) { ex.printStackTrace(); // TODO } finally { if (input != null) { try { input.close(); } catch (IOException e) { e.printStackTrace(); // TODO } } } return prop; } // FIXME we had problems with text formats, this worked for as. def prepareProperty(name, prop) { //new PrintStream(System.out, true, "UTF-8").println(prop.getProperty(name)); return new String(prop.getProperty(name).getBytes("ISO-8859-1"), "UTF-8"); //prop.getProperty(name); } def preparedTranscription(id) { String out = null; InputStream input = null; try { input = new FileInputStream(path+id+".text"); out = IOUtils.toString(new InputStreamReader(input, "UTF-8")); } catch (IOException ex) { // this will be displayed later in the script } finally { if (input != null) { try { input.close(); } catch (IOException e) { e.printStackTrace(); // TODO } } } return out; } def storeFileMetadata(id, speechToText) { RecordId recordId = new RecordId("bwmeta.element.pioniertv-"+id); StoreClient storeClient = new DefaultStoreClient(store); BatchBuilder batchBuilder = storeClient.createBatchBuilder(); batchBuilder.deleteRecord(recordId); batchBuilder.execute(); Properties prop = prepareProperties(path+id); YElement yElement = new YElement(); yElement.setId("bwmeta.element.pioniertv-"+id); if( ! prop.getProperty("title").isEmpty()) { yElement.addName(new YName(prepareProperty("title", prop))); } yElement.addDescription(new YDescription(YLanguage.Undetermined, prepareProperty("description", prop) , DescriptionTypes.DS_SUMMARY)); if( ! prop.getProperty("key-words").isEmpty()) { List tags = new ArrayList(Arrays.asList(prepareProperty("key-words", prop).split(","))); yElement.addTagList(new YTagList().setType(TagTypes.TG_KEYWORD).setValues(tags)); } if( ! prop.getProperty("supplier").isEmpty()) { yElement.addAttribute("copyright-holder", prepareProperty("supplier", prop)); } if( ! prop.getProperty("duration").isEmpty()) { yElement.addAttribute("duration", prepareProperty("duration", prop)); } if( ! prop.getProperty("language").isEmpty()) { yElement.addAttribute("language", prepareProperty("language", prop)); } transliterationPartId = UUID.randomUUID().toString(); YContentFile viedoContnentUrl = new YContentFile(id+"_"+transliterationPartId, VideoConstants.VIDEO_URL_FILE_TYPE, "pionierFormat", Collections.singletonList(id)); yElement.addContent(viedoContnentUrl); YContentFile viedoSpeechToTextContent = new YContentFile(id+"_text_"+transliterationPartId, FileTypes.FT_FULL_TEXT, "plain/text", Collections.singletonList(transliterationPartId)); viedoSpeechToTextContent.addLanguage(YLanguage.Polish); yElement.addContent(viedoSpeechToTextContent); YCurrent currentLevel = new YCurrent().setLevel(VideoConstants.VIDEO_CURRENT_LEVEL); yElement.addStructure(new YStructure() .setHierarchy(HierarchyWithLevelIds.EXT_HIERARCHY_PUBLICATION) .setCurrent(currentLevel)); fileData = BwmetaConverterUtils.YElementToBwmeta(yElement, BwmetaTransformerConstants.BWMETA_2_1); mime = MiscellaneousConstants.BWMETA_MIME_TYPE; final String BWMETA_PATH = "metadata/bwmeta-2.1.0"; List tags = new ArrayList(); tags.add(RepositoryStoreConstants.TAG_PREFIX_MAIN_METADATA + BWMETA_PATH); batchBuilder.addRecord(recordId); batchBuilder.onRecord(recordId).addTags(tags.toArray(new String[tags.size()])); if (null != speechToText) { batchBuilder.onRecord(recordId).addOrUpdateBinaryPart(PartType.SOURCE, transliterationPartId, IOUtils.toInputStream(speechToText), RepositoryStoreConstants.TAG_PREFIX_MIME + "plain/text", "type:plainText"); } else if (true == transliterationRequired) { throw new NullPointerException("transliterationRequired is true but the :"+recordId+" has not one. File "+id+".text is required."); } else { System.out.println("record: "+recordId+" has not the transcription file"); } batchBuilder.onRecord(recordId).addOrUpdateBinaryPart(PartType.SOURCE, BWMETA_PATH, IOUtils.toInputStream(fileData), RepositoryStoreConstants.TAG_PREFIX_MIME + mime); batchBuilder.execute(); System.out.println("Saved: "+recordId); return BWMETA_PATH; }