Я пытаюсь найти минимум набора данных, и мне помог этот пример.Код дает мне максимум.Я не могу найти, что изменить, чтобы заставить его найти мин.
from mrjob.job import MRJob
from mrjob.step import MRStep
class HighestRevenue(MRJob):
def mapper_get_city(self, key, line):
# create a key-value pair with key: city and value: amount
line_cols = line.split(',')
yield line_cols[0], float(line_cols[2])
def combiner_process_city(self, city, amount):
# consolidates all key-value pairs of mapper function (performed at mapper nodes)
yield city, sum(amount)
def reducer_city_amount(self, city, amount):
# final consolidation of key-value pairs at reducer nodes
yield None, (city, sum(amount))
def secondReducer(self, city, amount):
self.aList = []
for a in amount:
self.aList.append(a)
self.aList.sort(key=lambda x: x[1], reverse=True)
for m in range(0,1):
yield self.aList[0]
def steps(self):
return [
MRStep(mapper = self.mapper_get_city,
combiner = self.combiner_process_city,
reducer = self.reducer_city_amount),
MRStep(reducer = self.secondReducer)
]