hadoop-0.21.0-streaming.jar -Dmapreduce.job.output.key.
class=org.apache.hadoop.io.
DoubleWritable -files /home/shivani/research/
toolkit/mathouttuts/
nearestneighbor/code/
IdentityMapper.R#file1 -input datain/comparedata -output dataout5 -mapper file1 -reducer org.apache.hadoop.mapred.lib.
IdentityReducer -verbose
This is the output stream is as below. The failure is in the mapper itself, more specifically the TEXTOUTPUTREADER. I am not sure how to fix this. The logs are attached below:
11/04/13 13:22:15 INFO security.Groups: Group mapping impl=org.apache.hadoop.
security.
ShellBasedUnixGroupsMapping; cacheTimeout=300000
11/04/13 13:22:15 WARN conf.Configuration: mapred.used.
genericoptionsparser is deprecated. Instead, use mapreduce.client.
genericoptionsparser.used
STREAM: addTaskEnvironment=
STREAM: shippedCanonFiles_=[]
STREAM: shipped: false /usr/local/hadoop/file1
STREAM: cmd=file1
STREAM: cmd=null
STREAM: shipped: false /usr/local/hadoop/org.apache.
hadoop.mapred.lib.
IdentityReducer
STREAM: cmd=org.apache.hadoop.mapred.
lib.IdentityReducer
11/04/13 13:22:15 WARN conf.Configuration:
mapred.task.id is deprecated. Instead, use
mapreduce.task.attempt.id
STREAM: Found runtime classes in: /usr/local/hadoop-hadoop/
hadoop-
unjar7358684340334149267/
packageJobJar: [/usr/local/hadoop-hadoop/
hadoop-
unjar7358684340334149267/] [] /tmp/
streamjob2923554781371902680.
jar tmpDir=null
JarBuilder.addNamedStream META-INF/MANIFEST.MF
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesWritable.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesRecordOutput$1.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesWritableOutput$1.
class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesRecordOutput.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesOutput.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesOutput$1.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesInput$1.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesWritableOutput.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesRecordInput$
TypedBytesIndex.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesWritableInput$2.
class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesWritableInput.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesRecordInput.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
Type.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesWritableInput$1.
class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesRecordInput$1.class
JarBuilder.addNamedStream org/apache/hadoop/typedbytes/
TypedBytesInput.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
StreamUtil$TaskId.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PipeMapRed$1.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
StreamJob.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
StreamUtil.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
Environment.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/RawBytesOutputReader.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/TypedBytesInputWriter.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/TextInputWriter.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/InputWriter.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/TextOutputReader.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/IdentifierResolver.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/RawBytesInputWriter.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/TypedBytesOutputReader.
class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
io/OutputReader.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PipeMapRed.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PathFinder.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
LoadTypedBytes.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
StreamXmlRecordReader.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
UTF8ByteArrayUtils.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
JarBuilder.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
StreamUtil$StreamConsumer.
class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PipeMapRed$MRErrorThread.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
StreamKeyValUtil.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PipeCombiner.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PipeReducer.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
StreamInputFormat.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PipeMapRunner.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PipeMapRed$MROutputThread.
class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
HadoopStreaming.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
DumpTypedBytes.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
AutoInputFormat.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
PipeMapper.class
JarBuilder.addNamedStream org/apache/hadoop/streaming/
StreamBaseRecordReader.class
STREAM: ==== JobConf properties:
STREAM: dfs.block.access.key.update.
interval=600
STREAM: dfs.block.access.token.enable=
false
STREAM: dfs.block.access.token.
lifetime=600
STREAM: dfs.blockreport.initialDelay=0
STREAM: dfs.blockreport.intervalMsec=
21600000
STREAM: dfs.blocksize=67108864
STREAM: dfs.bytes-per-checksum=512
STREAM: dfs.client-write-packet-size=
65536
STREAM: dfs.client.block.write.
retries=3
STREAM: dfs.client.https.keystore.
resource=ssl-client.xml
STREAM: dfs.client.https.need-auth=
false
STREAM: dfs.datanode.address=
0.0.0.0:50010
STREAM: dfs.datanode.balance.
bandwidthPerSec=1048576
STREAM: dfs.datanode.data.dir=file://$
{hadoop.tmp.dir}/dfs/data
STREAM: dfs.datanode.data.dir.perm=755
STREAM: dfs.datanode.directoryscan.
interval=21600
STREAM: dfs.datanode.directoryscan.
threads=1
STREAM: dfs.datanode.dns.interface=
default
STREAM: dfs.datanode.dns.nameserver=
default
STREAM: dfs.datanode.du.reserved=0
STREAM: dfs.datanode.failed.volumes.
tolerated=0
STREAM: dfs.datanode.handler.count=3
STREAM: dfs.datanode.http.address=
0.0.0.0:50075
STREAM: dfs.datanode.https.address=
0.0.0.0:50475
STREAM: dfs.datanode.ipc.address=
0.0.0.0:50020
STREAM: dfs.default.chunk.view.size=
32768
STREAM: dfs.heartbeat.interval=3
STREAM: dfs.https.enable=false
STREAM: dfs.https.server.keystore.
resource=ssl-server.xml
STREAM: dfs.namenode.accesstime.
precision=3600000
STREAM: dfs.namenode.backup.address=
0.0.0.0:50100
STREAM: dfs.namenode.backup.http-
address=
0.0.0.0:50105
STREAM: dfs.namenode.checkpoint.dir=
file://${hadoop.tmp.dir}/dfs/
namesecondary
STREAM: dfs.namenode.checkpoint.edits.
dir=${dfs.namenode.checkpoint.
dir}
STREAM: dfs.namenode.checkpoint.
period=3600
STREAM: dfs.namenode.checkpoint.size=
67108864
STREAM: dfs.namenode.decommission.
interval=30
STREAM: dfs.namenode.decommission.
nodes.per.interval=5
STREAM: dfs.namenode.delegation.key.
update-interval=86400
STREAM: dfs.namenode.delegation.token.
max-lifetime=604800
STREAM: dfs.namenode.delegation.token.
renew-interval=86400
STREAM: dfs.namenode.edits.dir=${dfs.
namenode.name.dir}
STREAM: dfs.namenode.handler.count=10
STREAM: dfs.namenode.http-address=
0.0.0.0:50070
STREAM: dfs.namenode.https-address=
0.0.0.0:50470
STREAM: dfs.namenode.logging.level=
info
STREAM: dfs.namenode.max.objects=0
STREAM: dfs.namenode.name.dir=file://$
{hadoop.tmp.dir}/dfs/name
STREAM: dfs.namenode.replication.
considerLoad=true
STREAM: dfs.namenode.replication.
interval=3
STREAM: dfs.namenode.replication.min=1
STREAM: dfs.namenode.safemode.
extension=30000
STREAM: dfs.namenode.safemode.
threshold-pct=0.999f
STREAM: dfs.namenode.secondary.http-
address=
0.0.0.0:50090
STREAM: dfs.permissions.enabled=true
STREAM: dfs.permissions.
superusergroup=supergroup
STREAM: dfs.replication=1
STREAM: dfs.replication.max=512
STREAM: dfs.stream-buffer-size=4096
STREAM: dfs.web.ugi=webuser,webgroup
STREAM: file.blocksize=67108864
STREAM: file.bytes-per-checksum=512
STREAM: file.client-write-packet-size=
65536
STREAM: file.replication=1
STREAM: file.stream-buffer-size=4096
STREAM: fs.AbstractFileSystem.file.
impl=org.apache.hadoop.fs.
local.LocalFs
STREAM: fs.AbstractFileSystem.hdfs.
impl=org.apache.hadoop.fs.Hdfs
STREAM: fs.automatic.close=true
STREAM: fs.checkpoint.dir=${hadoop.
tmp.dir}/dfs/namesecondary
STREAM: fs.checkpoint.edits.dir=${fs.
checkpoint.dir}
STREAM: fs.checkpoint.period=3600
STREAM: fs.checkpoint.size=67108864
STREAM: fs.defaultFS=hdfs://localhost:
54310
STREAM: fs.df.interval=60000
STREAM: fs.file.impl=org.apache.
hadoop.fs.LocalFileSystem
STREAM: fs.ftp.impl=org.apache.hadoop.
fs.ftp.FTPFileSystem
STREAM: fs.har.impl=org.apache.hadoop.
fs.HarFileSystem
STREAM: fs.har.impl.disable.cache=true
STREAM: fs.hdfs.impl=org.apache.
hadoop.hdfs.
DistributedFileSystem
STREAM: fs.hftp.impl=org.apache.
hadoop.hdfs.HftpFileSystem
STREAM: fs.hsftp.impl=org.apache.
hadoop.hdfs.HsftpFileSystem
STREAM: fs.kfs.impl=org.apache.hadoop.
fs.kfs.KosmosFileSystem
STREAM: fs.ramfs.impl=org.apache.
hadoop.fs.InMemoryFileSystem
STREAM: fs.s3.block.size=67108864
STREAM: fs.s3.buffer.dir=${hadoop.tmp.
dir}/s3
STREAM: fs.s3.impl=org.apache.hadoop.
fs.s3.S3FileSystem
STREAM: fs.s3.maxRetries=4
STREAM: fs.s3.sleepTimeSeconds=10
STREAM: fs.s3n.block.size=67108864
STREAM: fs.s3n.impl=org.apache.hadoop.
fs.s3native.NativeS3FileSystem
STREAM: fs.trash.interval=0
STREAM: ftp.blocksize=67108864
STREAM: ftp.bytes-per-checksum=512
STREAM: ftp.client-write-packet-size=
65536
STREAM: ftp.replication=3
STREAM: ftp.stream-buffer-size=4096
STREAM: hadoop.common.configuration.
version=0.21.0
STREAM: hadoop.hdfs.configuration.
version=1
STREAM: hadoop.logfile.count=10
STREAM: hadoop.logfile.size=10000000
STREAM: hadoop.rpc.socket.factory.
class.default=org.apache.
hadoop.net.
StandardSocketFactory
STREAM: hadoop.security.
authentication=simple
STREAM: hadoop.security.authorization=
false
STREAM: hadoop.tmp.dir=/usr/local/
hadoop-${
user.name}
STREAM: hadoop.util.hash.type=murmur
STREAM: io.bytes.per.checksum=512
STREAM: io.compression.codecs=org.
apache.hadoop.io.compress.
DefaultCodec,org.apache.
hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.
BZip2Codec
STREAM: io.file.buffer.size=4096
STREAM: io.map.index.skip=0
STREAM: io.mapfile.bloom.error.rate=0.
005
STREAM: io.mapfile.bloom.size=1048576
STREAM: io.native.lib.available=true
STREAM: io.seqfile.compress.blocksize=
1000000
STREAM: io.seqfile.lazydecompress=true
STREAM: io.seqfile.local.dir=${hadoop.
tmp.dir}/io/local
STREAM: io.seqfile.sorter.recordlimit=
1000000
STREAM: io.serializations=org.apache.
hadoop.io.serializer.
WritableSerialization,org.
apache.hadoop.io.serializer.
avro.
AvroSpecificSerialization,org.
apache.hadoop.io.serializer.
avro.AvroReflectSerialization
STREAM: io.skip.checksum.errors=false
STREAM: ipc.client.connect.max.
retries=10
STREAM: ipc.client.connection.
maxidletime=10000
STREAM: ipc.client.idlethreshold=4000
STREAM: ipc.client.kill.max=10
STREAM: ipc.client.tcpnodelay=false
STREAM: ipc.server.listen.queue.size=
128
STREAM: ipc.server.tcpnodelay=false
STREAM: kfs.blocksize=67108864
STREAM: kfs.bytes-per-checksum=512
STREAM: kfs.client-write-packet-size=
65536
STREAM: kfs.replication=3
STREAM: kfs.stream-buffer-size=4096
STREAM: map.sort.class=org.apache.
hadoop.util.QuickSort
STREAM: mapred.child.java.opts=-
Xmx200m
STREAM: mapred.input.format.class=org.
apache.hadoop.mapred.
TextInputFormat
STREAM: mapred.map.runner.class=org.
apache.hadoop.streaming.
PipeMapRunner
STREAM: mapred.mapper.class=org.
apache.hadoop.streaming.
PipeMapper
STREAM: mapred.output.format.class=
org.apache.hadoop.mapred.
TextOutputFormat
STREAM: mapred.reducer.class=org.
apache.hadoop.mapred.lib.
IdentityReducer
STREAM: mapreduce.client.completion.
pollinterval=5000
STREAM: mapreduce.client.
genericoptionsparser.used=true
STREAM: mapreduce.client.output.
filter=FAILED
STREAM: mapreduce.client.
progressmonitor.pollinterval=
1000
STREAM: mapreduce.client.submit.file.
replication=10
STREAM: mapreduce.cluster.local.dir=${
hadoop.tmp.dir}/mapred/local
STREAM: mapreduce.cluster.temp.dir=${
hadoop.tmp.dir}/mapred/temp
STREAM: mapreduce.input.
fileinputformat.inputdir=hdfs:
//localhost:54310/user/hadoop/
datain/comparedata
STREAM: mapreduce.input.
fileinputformat.split.minsize=
0
STREAM: mapreduce.job.cache.symlink.
create=yes
STREAM: mapreduce.job.committer.setup.
cleanup.needed=true
STREAM: mapreduce.job.complete.cancel.
delegation.tokens=true
STREAM: mapreduce.job.end-
notification.retry.attempts=0
STREAM: mapreduce.job.end-
notification.retry.interval=
30000
STREAM: mapreduce.job.jar=/tmp/
streamjob2923554781371902680.
jar
STREAM: mapreduce.job.jvm.numtasks=1
STREAM: mapreduce.job.maps=2
STREAM: mapreduce.job.maxtaskfailures.
per.tracker=4
STREAM: mapreduce.job.output.key.
class=org.apache.hadoop.io.
Text
STREAM: mapreduce.job.output.value.
class=org.apache.hadoop.io.
Text
STREAM: mapreduce.job.queuename=
default
STREAM: mapreduce.job.reduce.
slowstart.completedmaps=0.05
STREAM: mapreduce.job.reduces=1
STREAM: mapreduce.job.speculative.
slownodethreshold=1.0
STREAM: mapreduce.job.speculative.
slowtaskthreshold=1.0
STREAM: mapreduce.job.speculative.
speculativecap=0.1
STREAM: mapreduce.job.split.metainfo.
maxsize=10000000
STREAM: mapreduce.job.userlog.retain.
hours=24
STREAM: mapreduce.job.working.dir=
hdfs://localhost:54310/user/
hadoop
STREAM: mapreduce.jobtracker.address=
localhost:54311
STREAM: mapreduce.jobtracker.expire.
trackers.interval=600000
STREAM: mapreduce.jobtracker.handler.
count=10
STREAM: mapreduce.jobtracker.
heartbeats.in.second=100
STREAM: mapreduce.jobtracker.http.
address=
0.0.0.0:50030
STREAM: mapreduce.jobtracker.
instrumentation=org.apache.
hadoop.mapred.
JobTrackerMetricsInst
STREAM: mapreduce.jobtracker.
jobhistory.block.size=3145728
STREAM: mapreduce.jobtracker.
jobhistory.lru.cache.size=5
STREAM: mapreduce.jobtracker.maxtasks.
perjob=-1
STREAM: mapreduce.jobtracker.persist.
jobstatus.active=true
STREAM: mapreduce.jobtracker.persist.
jobstatus.dir=/jobtracker/
jobsInfo
STREAM: mapreduce.jobtracker.persist.
jobstatus.hours=1
STREAM: mapreduce.jobtracker.restart.
recover=false
STREAM: mapreduce.jobtracker.
retiredjobs.cache.size=1000
STREAM: mapreduce.jobtracker.staging.
root.dir=${hadoop.tmp.dir}/
mapred/staging
STREAM: mapreduce.jobtracker.system.
dir=${hadoop.tmp.dir}/mapred/
system
STREAM: mapreduce.jobtracker.
taskcache.levels=2
STREAM: mapreduce.jobtracker.
taskscheduler=org.apache.
hadoop.mapred.
JobQueueTaskScheduler
STREAM: mapreduce.jobtracker.
tasktracker.maxblacklists=4
STREAM: mapreduce.map.log.level=INFO
STREAM: mapreduce.map.maxattempts=4
STREAM: mapreduce.map.output.compress=
false
STREAM: mapreduce.map.output.compress.
codec=org.apache.hadoop.io.
compress.DefaultCodec
STREAM: mapreduce.map.output.key.
class=org.apache.hadoop.io.
Text
STREAM: mapreduce.map.output.value.
class=org.apache.hadoop.io.
Text
STREAM: mapreduce.map.skip.maxrecords=
0
STREAM: mapreduce.map.skip.proc.count.
autoincr=true
STREAM: mapreduce.map.sort.spill.
percent=0.80
STREAM: mapreduce.map.speculative=true
STREAM: mapreduce.output.
fileoutputformat.compress=
false
STREAM: mapreduce.output.
fileoutputformat.compression.
codec=org.apache.hadoop.io.
compress.DefaultCodec
STREAM: mapreduce.output.
fileoutputformat.compression.
type=RECORD
STREAM: mapreduce.output.
fileoutputformat.outputdir=
hdfs://localhost:54310/user/
hadoop/dataout5
STREAM: mapreduce.reduce.input.buffer.
percent=0.0
STREAM: mapreduce.reduce.log.level=
INFO
STREAM: mapreduce.reduce.markreset.
buffer.percent=0.0
STREAM: mapreduce.reduce.maxattempts=4
STREAM: mapreduce.reduce.merge.inmem.
threshold=1000
STREAM: mapreduce.reduce.shuffle.
connect.timeout=180000
STREAM: mapreduce.reduce.shuffle.
input.buffer.percent=0.70
STREAM: mapreduce.reduce.shuffle.
merge.percent=0.66
STREAM: mapreduce.reduce.shuffle.
parallelcopies=5
STREAM: mapreduce.reduce.shuffle.read.
timeout=180000
STREAM: mapreduce.reduce.skip.
maxgroups=0
STREAM: mapreduce.reduce.skip.proc.
count.autoincr=true
STREAM: mapreduce.reduce.speculative=
true
STREAM: mapreduce.task.files.preserve.
failedtasks=false
STREAM: mapreduce.task.io.sort.factor=
10
STREAM: mapreduce.task.io.sort.mb=100
STREAM: mapreduce.task.merge.progress.
records=10000
STREAM: mapreduce.task.profile=false
STREAM: mapreduce.task.profile.maps=0-
2
STREAM: mapreduce.task.profile.
reduces=0-2
STREAM: mapreduce.task.skip.start.
attempts=2
STREAM: mapreduce.task.timeout=600000
STREAM: mapreduce.task.tmp.dir=./tmp
STREAM: mapreduce.task.userlog.limit.
kb=0
STREAM: mapreduce.tasktracker.cache.
local.size=10737418240
STREAM: mapreduce.tasktracker.dns.
interface=default
STREAM: mapreduce.tasktracker.dns.
nameserver=default
STREAM: mapreduce.tasktracker.
healthchecker.interval=60000
STREAM: mapreduce.tasktracker.
healthchecker.script.timeout=
600000
STREAM: mapreduce.tasktracker.http.
address=
0.0.0.0:50060
STREAM: mapreduce.tasktracker.http.
threads=40
STREAM: mapreduce.tasktracker.
indexcache.mb=10
STREAM: mapreduce.tasktracker.
instrumentation=org.apache.
hadoop.mapred.
TaskTrackerMetricsInst
STREAM: mapreduce.tasktracker.local.
dir.minspacekill=0
STREAM: mapreduce.tasktracker.local.
dir.minspacestart=0
STREAM: mapreduce.tasktracker.map.
tasks.maximum=2
STREAM: mapreduce.tasktracker.
outofband.heartbeat=false
STREAM: mapreduce.tasktracker.reduce.
tasks.maximum=2
STREAM: mapreduce.tasktracker.report.
address=
127.0.0.1:0
STREAM: mapreduce.tasktracker.
taskcontroller=org.apache.
hadoop.mapred.
DefaultTaskController
STREAM: mapreduce.tasktracker.
taskmemorymanager.
monitoringinterval=5000
STREAM: mapreduce.tasktracker.tasks.
sleeptimebeforesigkill=5000
STREAM: net.topology.node.switch.
mapping.impl=org.apache.
hadoop.net.ScriptBasedMapping
STREAM: net.topology.script.number.
args=100
STREAM: s3.blocksize=67108864
STREAM: s3.bytes-per-checksum=512
STREAM: s3.client-write-packet-size=
65536
STREAM: s3.replication=3
STREAM: s3.stream-buffer-size=4096
STREAM: s3native.blocksize=67108864
STREAM: s3native.bytes-per-checksum=
512
STREAM: s3native.client-write-packet-
size=65536
STREAM: s3native.replication=3
STREAM: s3native.stream-buffer-size=
4096
STREAM: stream.addenvironment=
STREAM: stream.map.input.writer.class=
org.apache.hadoop.streaming.
io.TextInputWriter
STREAM: stream.map.output.reader.
class=org.apache.hadoop.
streaming.io.TextOutputReader
STREAM: stream.map.streamprocessor=
file1
STREAM: stream.numinputspecs=1
STREAM: stream.reduce.input.writer.
class=org.apache.hadoop.
streaming.io.TextInputWriter
STREAM: stream.reduce.output.reader.
class=org.apache.hadoop.
streaming.io.TextOutputReader
STREAM: tmpfiles=file:/home/shivani/
research/toolkit/mathouttuts/
nearestneighbor/code/
IdentityMapper.R#file1
STREAM: webinterface.private.actions=
false
STREAM: ====
STREAM: submitting to jobconf: localhost:54311
11/04/13 13:22:17 INFO mapred.FileInputFormat: Total input paths to process : 1
11/04/13 13:22:17 WARN conf.Configuration: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
11/04/13 13:22:17 INFO mapreduce.JobSubmitter: number of splits:2
11/04/13 13:22:17 INFO mapreduce.JobSubmitter: adding the following namenodes' delegation tokens:null
11/04/13 13:22:17 INFO streaming.StreamJob: getLocalDirs(): [/usr/local/hadoop-hadoop/
mapred/local]
11/04/13 13:22:17 INFO streaming.StreamJob: Running job: job_201104131251_0002
11/04/13 13:22:17 INFO streaming.StreamJob: To kill this job, run:
11/04/13 13:22:17 INFO streaming.StreamJob: /usr/local/hadoop/bin/hadoop job -Dmapreduce.jobtracker.
address=localhost:54311 -kill job_201104131251_0002
11/04/13 13:22:17 INFO streaming.StreamJob: Tracking URL:
http://localhost:50030/jobdetails.jsp?jobid=job_201104131251_0002
11/04/13 13:22:18 INFO streaming.StreamJob: map 0% reduce 0%
11/04/13 13:23:19 INFO streaming.StreamJob: map 100% reduce 100%
11/04/13 13:23:19 INFO streaming.StreamJob: To kill this job, run:
11/04/13 13:23:19 INFO streaming.StreamJob: /usr/local/hadoop/bin/hadoop job -Dmapreduce.jobtracker.
address=localhost:54311 -kill job_201104131251_0002
11/04/13 13:23:19 INFO streaming.StreamJob: Tracking URL:
http://localhost:50030/jobdetails.jsp?jobid=job_201104131251_0002
11/04/13 13:23:19 ERROR streaming.StreamJob: Job not Successful!
11/04/13 13:23:19 INFO streaming.StreamJob: killJob...
Streaming Command Failed!
I looked at the output of the mapper and it fails
ava.lang.NullPointerException at
java.lang.String.
(String.java:523) at
org.apache.hadoop.streaming.io.TextOutputReader.getLastOutput(TextOutputReader.java:87) at
org.apache.hadoop.streaming.PipeMapRed.getContext(PipeMapRed.java:616) at
org.apache.hadoop.streaming.PipeMapRed.logFailure(PipeMapRed.java:643) at
org.apache.hadoop.streaming.PipeMapper.map(PipeMapper.java:123) at
org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54) at
org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:36) at
org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:397) at
org.apache.hadoop.mapred.MapTask.run(MapTask.java:330) at
org.apache.hadoop.mapred.Child$4.run(Child.java:217) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:416) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:742) at
org.apache.hadoop.mapred.Child.main(Child.java:211)