8

nano wordcount_sim.py

text = """ Hadoop MapReduce is a software framework for easily writing applications which

process vast amounts """

def mapper(line):

words = line.strip().split()

return [(word, 1) for word in words]

from collections import defaultdict

# Map phase

mapped = []

for line in text.strip().split('\n'):

mapped.extend(mapper(line))

# Shuffle and sort phase

shuffle_sort = defaultdict(list)

for word, count in mapped:

shuffle_sort[word].append(count)

# Reduce phase

def reducer(shuffled_data):

reduced = {}

for word, counts in shuffled_data.items():

reduced[word] = sum(counts)

return reduced

word_counts = reducer(shuffle_sort)

# Output

for word, count in word_counts.items():

print(f"{word}\t{count}")

ctrl+o

python3 wordcount_sim.py

(or)python wordcount_sim.py

alfaaz