Commit ee4d73ee authored by Martin Perdacher's avatar Martin Perdacher

Initial commit

parents
.DS_Store
FROM debian:latest
LABEL maintainer="Martin Perdacher <martin.perdacher@univie.ac.at>"\
lecture="Scientific Data Management (SDM), Data Mining (DM)" \
description="Introduction into Spark"
RUN apt-get update -y --fix-missing && apt-get install -y wget curl git bzip2 ca-certificates gnupg gnupg2 apt-transport-https \
libglib2.0-0 libxext6 libsm6 libxrender1 \
git mercurial subversion \
build-essential \
python3-pip python-pip
RUN pip install pyspark --no-cache-dir
# SPARK requires java runtime
RUN apt-get install -y openjdk-11-jre
RUN apt-get install -y openjdk-11-jdk openjdk-11-demo openjdk-11-doc openjdk-11-jre-headless openjdk-11-source
ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/bin/java
# SBT for the development of code in scala or java
RUN curl -L -o sbt-1.3.4.deb http://dl.bintray.com/sbt/debian/sbt-1.3.4.deb
RUN dpkg -i sbt-1.3.4.deb
RUN rm sbt-1.3.4.deb
COPY . .
# Helloworld example using Apache spark
### Install prerequisites
- Docker available from [docker-url](https://www.docker.com/products/docker-desktop)
### Running the example
Run Spark locally with as many cores available
```{bash}
spark-submit --master local helloworld.py
```
Run Spark locally with 2 cores
```{bash}
spark-submit --master "local[2]" helloworld.py
```
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("SimpleApp").getOrCreate()
# -> your Spark code goes here
spark.stop()
from pyspark import SparkContext, SparkConf
conf = SparkConf().setAppName("SimpleApp")
sc = SparkContext(conf=conf)
# -> your Spark code goes here
sc.stop
#!/usr/bin/env python
from distutils.core import setup
setup(name='hellospark',
version='1.0',
description='hellospark script in python',
author='Martin Perdacher',
author_email='martin.perdacher@univie.ac.at'
)
install_requires=[
'pyspark=={site.SPARK_VERSION}'
]
name := "YOUR_GROUP"
version := "0.1"
scalaVersion := "2.13.1"
libraryDependencies ++= {
val sparkVer = "2.4.5"
Seq(
"org.apache.spark" %% "spark-core" % sparkVer withSources(),
"org.apache.spark" %% "spark-mllib" % sparkVer
)
}
// mainClass := Some("KMeansClustering")
assemblyMergeStrategy in assembly := {
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case x => MergeStrategy.first
}
// https://stackoverflow.com/a/28498443/841052
// https://github.com/sbt/sbt-assembly
resolvers += Resolver.url("bintray-sbt-plugins", url("https://dl.bintray.com/eed3si9n/sbt-plugins/"))(Resolver.ivyStylePatterns)
resolvers += Resolver.bintrayIvyRepo("com.eed3si9n", "sbt-plugins")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")
package at.ac.univie.spark;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.SparkConf;
class RunSpark{
public static void main(String [] args)
{
// on AWS:
// SparkConf conf = new SparkConf().setAppName("GRUPPEXX");
// local environment (laptop/PC)
SparkConf conf = new SparkConf().setAppName("GRUPPEXX").setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
String AWS_ACCESS_KEY_ID = "";
String AWS_SECRET_ACCESS_KEY = "";
System.out.println(">>>>>>>>>>>>>>>>>> Hello from Spark! <<<<<<<<<<<<<<<<<<<<<<<<<");
// ...
// example accessing S3:
/*
clusterMembers.saveAsTextFile("s3n://" + AWS_ACCESS_KEY_ID + ":" + AWS_SECRET_ACCESS_KEY + "@qltrail-lab-265-1488270472/result");
*/
sc.stop();
}
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
208
315
642
1075
1271
1534
1732
1808
2143
2384
2938
3428
3969
4423
4532
5699
5985
6398
6954
7138
8400
9041
9108
9239
9432
9667
9797
11503
11688
11731
12274
13618
14394
15280
15332
16571
17010
17368
17453
17680
18030
18161
18629
19514
19825
19913
20068
21266
21505
22299
23174
23654
25121
25577
25766
26059
26553
27110
27356
28342
28522
28964
29861
30678
30949
31104
31567
31647
31935
32179
32972
33204
33421
33722
34298
34486
35087
35389
36239
37405
37443
38046
40009
40467
41048
41317
44271
45010
45077
46776
47378
47895
47898
48021
48133
48505
48717
49946
49994
51420
51461
51764
51853
52014
52379
52958
53427
54289
54529
54596
55161
55447
55666
55756
55922
56000
56312
56466
57360
57721
58348
60614
61215
61423
62246
62289
62564
62616
62812
63369
64227
64582
64970
65525
65985
67028
67098
67186
67326
67331
68212
68667
68775
69320
71345
71656
72902
73079
73370
73523
74179
74541
74858
75409
75691
76415
76589
76684
76912
77855
77904
78636
78966
79668
79722
79926
79955
81066
81520
82064
82378
83815
84705
85394
87370
88853
89472
90309
90568
90601
91219
91431
91653
92918
93181
93239
93942
95150
96127
97440
97565
97992
98154
98687
98856
99415
99463
101272
101287
101335
101438
101667
101691
101886
101932
102650
102885
103715
105160
105545
105953
106151
106793
107028
108091
108467
109577
109592
110478
112055
113040
113209
113327
113360
113368
113528
114043
114285
114860
114923
115700
116130
117505
117692
118416
118720
118899
118949
118973
121489
121495
122947
123133
123267
123396
123474
123844
124167
124265
124579
124806
124830
124950
126224
126500
127323
127574
127796
127832
127864
128083
129288
129662
130408
130717
130758
131379
131552
131624
132718
133159
135527
135620
135800
136406
136641
136883
138304
138548
138875
139095
139652
141799
142005
142021
142072
142290
142853
144027
144378
145843
145948
146103
146792
147886
148278
148938
149513
149690
149962
150362
150385
150429
150446
152340
152498
152896
153973
154024
154569
155264
155788
156212
158282
159186
159815
160183
161601
161649
161729
162234
162673
163602
163926
163949
164070
164164
165938
167250
167862
167931
168448
170545
171486
171660
172023
172170
172234
172313
173180
173467
173655
174731
174762
176076
176858
177561
178605
179455
179850
180273
180651
180739
180930
181006
181007
181406
181653
182111
182616
182697
183283
184007
184527
185078
185917
186413
186803
186883
187871
189467
189611
190236
190280
190638
191481
192353
192976
194530
194785
196439
196528
196885
197232
198263
199412
199606
200394
200632
200663
200901
201738
202954
204106
204346
205389
206269
206376
206519
206627
207481
207648
208027
208198
209123
209201
209270
209533
211809
212025
212410
212888
213152
213301
213325
213803
213809
213893
214028
214110
214765
214830
214836
214990
215068
215833
215906
216462
216495
217757
218185
218440
218469
219193
221492
222714
224280
224969
225075
226980
227349
228412
228419
228707
229876
229884
230141
230439
230452
231121
231854
232745
233024
233591
233955
233978
234015
234088
234124
234429
234542
235074
239146
239725
240325
241089
242383
242509
243099
243125
243932
244713
245208
246436
247539
247679
248119
249364
249821
250613
250973
251593
252215
252336
253120
254025
254185
254244
254656
255147
255848
256645
256783
257727
257878
258350
258600
259280
259669
260409
261679
261997
262250
262419
263360
263872
264654
264744
265257
266073
266306
266702
266781
267992
268217
268304
269520
271716
271945
273358
273509
274088
274161
274310