A big shout out to those who wrote this awesome tool that takes in code and formats it in html so that you can paste it in your blog
http://codeformatter.blogspot.com/
http://codeformatter.blogspot.com/
import org.specs2.Specification
import org.specs2.mock.Mockito
import org.apache.spark.SparkContext
trait SparkTests extends Specification{
var sc: SparkContext = _
def runTest[A](name: String)(body: => A): A = {
System.clearProperty("spark.driver.port")
System.clearProperty("spark.hostPort")
sc = new SparkContext("local[4]", name)
try{
println("Running test " + name)
body
}
finally {
sc.stop
System.clearProperty("spark.driver.port")
System.clearProperty("spark.hostPort")
sc = null
}
}
}
class LogPreprocessorSpec extends Specification with Mockito with ScalaCheck with SparkTests {
sequential
testOptions in Test += Tests.Argument("sequential")
libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.1"
l
ibraryDependencies += "org.apache.hadoop" % "hadoop-client" % "your-hadoop-version"
Just making a quick reference to the rysnc manual in order to synchronize directories
The command used to copy folders is as follows:
rsync -avz foo:src/bar /data/tmp
This command copies the directory bar in /data/tmp ..
meaning at the end of this command, you will have /data/tmp/bar folder
If you just want to sync the folders then use the trailing slash like this
rsync -avz foo:src/bar/ /data/tmp
Now only the contents of bar will be copied into /data/tmp folder;
you will not find a folder called bar in /data/tmp