8
nano wordcount_sim.py
text = """ Hadoop MapReduce is a software framework for easily writing applications which
process vast amounts """
def mapper(line):
words = line.strip().split()
return [(word, 1) for word in words]
from collections import defaultdict
# Map phase
mapped = []
for line in text.strip().split('\n'):
mapped.extend(mapper(line))
# Shuffle and sort phase
shuffle_sort = defaultdict(list)
for word, count in mapped:
shuffle_sort[word].append(count)
# Reduce phase
def reducer(shuffled_data):
reduced = {}
for word, counts in shuffled_data.items():
reduced[word] = sum(counts)
return reduced
word_counts = reducer(shuffle_sort)
# Output
for word, count in word_counts.items():
print(f"{word}\t{count}")
ctrl+o
python3 wordcount_sim.py
(or)python wordcount_sim.py
Comments
Post a Comment