public class OaiFeedMapper
extends org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.NullWritable>
| Constructor and Description |
|---|
OaiFeedMapper() |
| Modifier and Type | Method and Description |
|---|---|
boolean |
checkRecordFields(com.google.common.collect.Multimap<String,String> recordFields,
org.apache.hadoop.mapreduce.Mapper.Context context,
String recordKey,
String recordBody) |
protected void |
cleanup(org.apache.hadoop.mapreduce.Mapper.Context context) |
protected com.mongodb.DBObject |
createBasicObject(String oaiID,
String record,
com.google.common.collect.Multimap<String,String> recordProperties,
org.apache.hadoop.mapreduce.Mapper.Context context) |
org.bson.types.Binary |
createCompressRecord(org.apache.hadoop.mapreduce.Mapper.Context context,
String recordKey,
String recordBody) |
com.mongodb.client.MongoCollection<com.mongodb.DBObject> |
getCollection() |
com.mongodb.client.MongoCollection<com.mongodb.DBObject> |
getDiscardedCollection() |
String |
getDuplicateXPath() |
Date |
getFeedDate() |
MongoSetCollection |
getMongoSetCollection() |
OAIConfiguration |
getOaiConfiguration() |
OAIConfigurationStringReader |
getOaiConfigurationReader() |
String[] |
getParseDatePatterns() |
boolean |
isSkipDuplicates() |
protected void |
map(org.apache.hadoop.io.Text key,
org.apache.hadoop.io.Text value,
org.apache.hadoop.mapreduce.Mapper.Context context) |
protected Date |
parseDate(String date) |
void |
setCollection(com.mongodb.client.MongoCollection<com.mongodb.DBObject> collection) |
void |
setDiscardedCollection(com.mongodb.client.MongoCollection<com.mongodb.DBObject> discardedCollection) |
void |
setDuplicateXPath(String duplicateXPath) |
void |
setFeedDate(Date feedDate) |
void |
setMongoSetCollection(MongoSetCollection mongoSetCollection) |
void |
setOaiConfiguration(OAIConfiguration oaiConfiguration) |
void |
setOaiConfigurationReader(OAIConfigurationStringReader oaiConfigurationReader) |
void |
setParseDatePatterns(String[] parseDatePatterns) |
void |
setSkipDuplicates(boolean skipDuplicates) |
protected void |
setup(org.apache.hadoop.mapreduce.Mapper.Context context) |
protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
throws UnknownHostException
setup in class org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.NullWritable>UnknownHostExceptionprotected void map(org.apache.hadoop.io.Text key,
org.apache.hadoop.io.Text value,
org.apache.hadoop.mapreduce.Mapper.Context context)
throws IOException,
InterruptedException
map in class org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.NullWritable>IOExceptionInterruptedExceptionpublic boolean checkRecordFields(com.google.common.collect.Multimap<String,String> recordFields, org.apache.hadoop.mapreduce.Mapper.Context context, String recordKey, String recordBody)
protected com.mongodb.DBObject createBasicObject(String oaiID, String record, com.google.common.collect.Multimap<String,String> recordProperties, org.apache.hadoop.mapreduce.Mapper.Context context)
public org.bson.types.Binary createCompressRecord(org.apache.hadoop.mapreduce.Mapper.Context context,
String recordKey,
String recordBody)
protected void cleanup(org.apache.hadoop.mapreduce.Mapper.Context context)
throws IOException,
InterruptedException
cleanup in class org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text,org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.NullWritable>IOExceptionInterruptedExceptionpublic com.mongodb.client.MongoCollection<com.mongodb.DBObject> getCollection()
public void setCollection(com.mongodb.client.MongoCollection<com.mongodb.DBObject> collection)
public com.mongodb.client.MongoCollection<com.mongodb.DBObject> getDiscardedCollection()
public void setDiscardedCollection(com.mongodb.client.MongoCollection<com.mongodb.DBObject> discardedCollection)
public OAIConfigurationStringReader getOaiConfigurationReader()
public void setOaiConfigurationReader(OAIConfigurationStringReader oaiConfigurationReader)
public OAIConfiguration getOaiConfiguration()
public void setOaiConfiguration(OAIConfiguration oaiConfiguration)
public Date getFeedDate()
public void setFeedDate(Date feedDate)
public MongoSetCollection getMongoSetCollection()
public void setMongoSetCollection(MongoSetCollection mongoSetCollection)
public String getDuplicateXPath()
public void setDuplicateXPath(String duplicateXPath)
public boolean isSkipDuplicates()
public void setSkipDuplicates(boolean skipDuplicates)
public String[] getParseDatePatterns()
public void setParseDatePatterns(String[] parseDatePatterns)
Copyright © 2023. All rights reserved.