8

 nano wordcount_sim.py

text = """ Hadoop MapReduce is a software framework for easily writing applications which
process vast amounts """

def mapper(line):
    words = line.strip().split()
    return [(word, 1) for word in words]
from collections import defaultdict
# Map phase
mapped = []
for line in text.strip().split('\n'):
    mapped.extend(mapper(line))
# Shuffle and sort phase
shuffle_sort = defaultdict(list)
for word, count in mapped:
    shuffle_sort[word].append(count)
# Reduce phase
def reducer(shuffled_data):
    reduced = {}
    for word, counts in shuffled_data.items():
        reduced[word] = sum(counts)
    return reduced
word_counts = reducer(shuffle_sort)
# Output
for word, count in word_counts.items():
    print(f"{word}\t{count}")


ctrl+o
python3 wordcount_sim.py
(or)python wordcount_sim.py

Comments

Popular posts from this blog

7

6