-
Notifications
You must be signed in to change notification settings - Fork 1
/
generateTestOutputs.py
executable file
·35 lines (27 loc) · 1.1 KB
/
generateTestOutputs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sys
from operator import add
from pyspark import SparkContext
# Test 1 WordCount
sc = SparkContext("local[*]", "Test")
lines = sc.textFile("./Input_files/wordCount.txt", 1)
counts = lines.flatMap(lambda x: x.split(' '))\
.filter(lambda x: x != '')\
.map(lambda x: (x, 1))\
.reduceByKey(add)\
.saveAsTextFile("./spark_test_outputs/wordCount_testOut")
# Test 2 getAverageStockPrice
lines = sc.textFile("./Input_files/getAverageStockPrice.txt", 1)
averageStockPrice = lines.map(lambda x: (x.split(' ')[1], float(x.split(' ')[2])))\
.mapValues(lambda v: (v,1))\
.reduceByKey(lambda a,b: (a[0]+b[0], a[1]+b[1]))\
.mapValues(lambda v:v[0]/v[1])\
.saveAsTextFile("./spark_test_outputs/getAverageStockPrice_testOut")
# Test 3 searchWord
lines = sc.textFile("./Input_files/searchWord.txt", 1)
searchWord = "searchMe"
averageStockPrice = lines.flatMap(lambda x: x.split(' '))\
.filter(lambda x: x != '')\
.map(lambda x: (searchWord, 1 if x==searchWord else 0))\
.reduceByKey(add)\
.mapValues(lambda v:"True" if v>0 else "False")\
.saveAsTextFile("./spark_test_outputs/searchWord_testOut")