1、基础环境
2、spark环境
cd ~
wget http://archive.apache.org/dist/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz
tar -xzvf spark-2.2.1-bin-hadoop2.7.tgz
sudo vim /etc/profile
#增加spark home
export SPARK_HOME=/home/carbondata/spark-2.2.1-bin-hadoop2.7
source /etc/profile
cd spark-2.2.1-bin-hadoop2.7 && mkdir carbonlib && cd ~
cp ./carbondata/assembly/target/scala-2.11/apache-carbondata-1.6.0-SNAPSHOT-bin-spark2.2.1-hadoop2.7.2.jar ./spark-2.2.1-bin-hadoop2.7/carbonlib/
cp ./carbondata/conf/carbon.properties.template ./spark-2.2.1-bin-hadoop2.7/conf/
cd $SPARK_HOME
tar -zcvf carbondata.tar.gz carbonlib/
mv carbondata.tar.gz carbonlib/
mv ./conf/carbon.properties.template ./conf/carbon.properties
mv ./conf/spark-defaults.conf.template ./conf/spark-defaults.conf
mv ./conf/spark-env.sh.template ./conf/spark-env.sh
cp ~/hadoop-2.7.2/share/hadoop/tools/lib/aws-java-sdk-1.7.4.jar ~/spark-2.2.1-bin-hadoop2.7/jars/
cp ~/hadoop-2.7.2/share/hadoop/tools/lib/hadoop-aws-2.7.2.jar ~/spark-2.2.1-bin-hadoop2.7/jars/
3、配置carbondata和spark
1) carbon.properties
在carbon.properties中增加配置store location
carbon.storelocation=s3a://demo20190203/carbon/data/store
- spark-defaults.conf
在spark-defaults.conf中增加如下配置
spark.driver.extraJavaOptions -Dcarbon.properties.filepath=$SPARK_HOME/carbon.properties
spark.executor.extraJavaOptions -Dcarbon.properties.filepath=$SPARK_HOME/carbon.properties
spark.master yarn-client
spark.yarn.dist.files conf/carbon.properties
spark.yarn.dist.archives carbonlib/carbondata.tar.gz
spark.executor.extraClassPath carbondata.tar.gz/carbonlib/*
spark.driver.extraClassPath $SPARK_HOME/carbonlib/*
3)spark-env.sh
在spark-env.sh中增加如下配置
export SPARK_CLASSPATH=$SPARK_CLASSPATH:$SPARK_HOME/carbonlib/*:/home/carbondata/hadoop-2.7.2/share/hadoop/tools/lib/*
export HADOOP_HOME=/home/carbondata/hadoop-2.7.2
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
- 创建hdfs carbondata临时目录
hadoop fs -mkdir /tmp
hadoop fs -mkdir /tmp/carbondata
hadoop fs -chmod -R 777 /tmp/carbondata
4、准备sample.csv
cd ~
vim sample.csv
#拷贝如下内容
id,name,city,age
1,david,shenzhen,31
2,eason,shenzhen,27
3,jarry,wuhan,35
#上传sample.csv
hadoop fs -put ./sample.csv /tmp