1+ /*
2+ * Copyright (C) 2015 Stratio (http://stratio.com)
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+ package com .stratio .crossdata .connector .elasticsearch
17+
18+ import java .util .{GregorianCalendar , UUID }
19+
20+ import com .sksamuel .elastic4s .ElasticDsl ._
21+ import com .sksamuel .elastic4s .mappings .FieldType ._
22+ import com .sksamuel .elastic4s .mappings .{MappingDefinition , TypedFieldDefinition }
23+ import com .stratio .common .utils .components .logger .impl .SparkLoggerComponent
24+ import com .typesafe .config .ConfigFactory
25+ import org .apache .spark .sql .Row
26+ import org .apache .spark .sql .catalyst .expressions .GenericRowWithSchema
27+ import org .apache .spark .sql .crossdata .test .SharedXDContextTypesTest
28+ import org .apache .spark .sql .crossdata .test .SharedXDContextTypesTest .SparkSQLColDef
29+ import org .joda .time .DateTime
30+
31+ trait ElasticDataTypes extends ElasticWithSharedContext
32+ with SharedXDContextTypesTest
33+ with ElasticSearchDataTypesDefaultConstants
34+ with SparkLoggerComponent {
35+
36+ override val dataTypesSparkOptions = Map (
37+ " resource" -> s " $Index/ $Type" ,
38+ " es.nodes" -> s " $ElasticHost" ,
39+ " es.port" -> s " $ElasticRestPort" ,
40+ " es.nativePort" -> s " $ElasticNativePort" ,
41+ " es.cluster" -> s " $ElasticClusterName" ,
42+ " es.nodes.wan.only" -> " true" ,
43+ " es.read.field.as.array.include" -> Seq (
44+ " arrayint"
45+ ).mkString(" ," )
46+ )
47+
48+ protected case class ESColumnData (elasticType : Option [TypedFieldDefinition ], data : () => Any )
49+ protected object ESColumnData {
50+ def apply (data : () => Any ): ESColumnData = ESColumnData (None , data)
51+ def apply (elasticType : TypedFieldDefinition , data : () => Any ): ESColumnData = ESColumnData (Some (elasticType), data)
52+ }
53+
54+
55+ override val arrayFlattenTestColumn : String = " arraystruct"
56+
57+ protected val dataTest : Seq [(SparkSQLColDef , ESColumnData )] = Seq (
58+ (SparkSQLColDef (" id" , " INT" , _ shouldBe a[java.lang.Integer ]), ESColumnData (" id" typed IntegerType , () => 1 )),
59+ (SparkSQLColDef (" age" , " LONG" , _ shouldBe a[java.lang.Long ]), ESColumnData (" age" typed LongType , () => 1 )),
60+ (
61+ SparkSQLColDef (" description" , " STRING" , _ shouldBe a[java.lang.String ]),
62+ ESColumnData (" description" typed StringType , () => " 1" )
63+ ),
64+ (
65+ SparkSQLColDef (" name" , " STRING" , _ shouldBe a[java.lang.String ]),
66+ ESColumnData ( " name" typed StringType index NotAnalyzed , () => " 1" )
67+ ),
68+ (
69+ SparkSQLColDef (" enrolled" , " BOOLEAN" , _ shouldBe a[java.lang.Boolean ]),
70+ ESColumnData (" enrolled" typed BooleanType , () => false )
71+ ),
72+ (
73+ SparkSQLColDef (" birthday" , " DATE" , _ shouldBe a [java.sql.Date ]),
74+ ESColumnData (" birthday" typed DateType , () => DateTime .parse(1980 + " -01-01T10:00:00-00:00" ).toDate)
75+ ),
76+ (
77+ SparkSQLColDef (" salary" , " DOUBLE" , _ shouldBe a[java.lang.Double ]),
78+ ESColumnData (" salary" typed DoubleType , () => 0.15 )
79+ ),
80+ (
81+ SparkSQLColDef (" timecol" , " TIMESTAMP" , _ shouldBe a[java.sql.Timestamp ]),
82+ ESColumnData (
83+ " timecol" typed DateType ,
84+ () => new java.sql.Timestamp (new GregorianCalendar (1970 , 0 , 1 , 0 , 0 , 0 ).getTimeInMillis)
85+ )
86+ ),
87+ (
88+ SparkSQLColDef (" float" , " FLOAT" , _ shouldBe a[java.lang.Float ]),
89+ ESColumnData (" float" typed FloatType , () => 0.15 )
90+ ),
91+ (
92+ SparkSQLColDef (" binary" , " BINARY" , x => x.isInstanceOf [Array [Byte ]] shouldBe true ),
93+ ESColumnData (" binary" typed BinaryType , () => " YWE=" .getBytes)
94+ ),
95+ (
96+ SparkSQLColDef (" tinyint" , " TINYINT" , _ shouldBe a[java.lang.Byte ]),
97+ ESColumnData (" tinyint" typed ByteType , () => Byte .MinValue )
98+ ),
99+ (
100+ SparkSQLColDef (" smallint" , " SMALLINT" , _ shouldBe a[java.lang.Short ]),
101+ ESColumnData (" smallint" typed ShortType , () => Short .MaxValue )
102+ ),
103+ (
104+ SparkSQLColDef (" subdocument" , " STRUCT<field1: INT>" , _ shouldBe a [Row ]),
105+ ESColumnData (" subdocument" inner (" field1" typed IntegerType ), () => Map ( " field1" -> 15 ))
106+ ),
107+ (
108+ SparkSQLColDef (
109+ " structofstruct" ,
110+ " STRUCT<field1: INT, struct1: STRUCT<structField1: INT>>" ,
111+ { res =>
112+ res shouldBe a[GenericRowWithSchema ]
113+ res.asInstanceOf [GenericRowWithSchema ].get(1 ) shouldBe a[GenericRowWithSchema ]
114+ }
115+ ),
116+ ESColumnData (
117+ " structofstruct" inner (" field1" typed IntegerType , " struct1" inner(" structField1" typed IntegerType )),
118+ () => Map (" field1" -> 15 , " struct1" -> Map (" structField1" -> 42 ))
119+ )
120+ ),
121+ (
122+ SparkSQLColDef (" arrayint" , " ARRAY<INT>" , _ shouldBe a[Seq [_]]),
123+ ESColumnData (() => Seq (1 ,2 ,3 ,4 ))
124+ ),
125+ (
126+ SparkSQLColDef (" arraystruct" , " ARRAY<STRUCT<field1: LONG, field2: LONG>>" , _ shouldBe a[Seq [_]]),
127+ ESColumnData (
128+ " arraystruct" nested(
129+ " field1" typed LongType ,
130+ " field2" typed LongType
131+ ),
132+ () =>
133+ Array (
134+ Map (
135+ " field1" -> 11 ,
136+ " field2" -> 12
137+ ),
138+ Map (
139+ " field1" -> 21 ,
140+ " field2" -> 22
141+ ),
142+ Map (
143+ " field1" -> 31 ,
144+ " field2" -> 32
145+ )
146+ )
147+ )
148+ )/* ,
149+ (
150+ SparkSQLColDef(
151+ "arraystructarraystruct",
152+ "ARRAY<STRUCT<stringfield: STRING, arrayfield: ARRAY<STRUCT<field1: INT, field2: INT>>>>",
153+ { res =>
154+ res shouldBe a[Seq[_]]
155+ res.asInstanceOf[Seq[_]].head shouldBe a[Row]
156+ res.asInstanceOf[Seq[_]].head.asInstanceOf[Row].get(1) shouldBe a[Seq[_]]
157+ res.asInstanceOf[Seq[_]].head.asInstanceOf[Row].get(1).asInstanceOf[Seq[_]].head shouldBe a[Row]
158+ }
159+ ),
160+ ESColumnData(
161+ "arraystructarraystruct" nested (
162+ "stringfield" typed StringType,
163+ "arrayfield" nested (
164+ "field1" typed IntegerType,
165+ "field2" typed IntegerType
166+ )
167+ ),
168+ () => Array(
169+ Map(
170+ "stringfield" -> "hello",
171+ "arrayfield" -> Array(
172+ Map(
173+ "field1" -> 10,
174+ "field2" -> 20
175+ )
176+ )
177+ )
178+ )
179+ )
180+ )*/
181+ )
182+
183+
184+ override protected def typesSet : Seq [SparkSQLColDef ] = dataTest.map(_._1)
185+
186+
187+ abstract override def saveTestData : Unit = {
188+ require(saveTypesData > 0 , emptyTypesSetError)
189+ }
190+
191+ override def saveTypesData : Int = {
192+ client.get.execute {
193+ val fieldsData = dataTest map {
194+ case (SparkSQLColDef (fieldName, _, _), ESColumnData (_, data)) => (fieldName, data())
195+ }
196+ index into Index / Type fields (fieldsData : _* )
197+ }.await
198+ client.get.execute {
199+ flush index Index
200+ }.await
201+ 1
202+ }
203+
204+ override def typeMapping (): MappingDefinition =
205+ Type fields (
206+ dataTest collect {
207+ case (_, ESColumnData (Some (mapping), _)) => mapping
208+ }: _*
209+ )
210+
211+ override val emptyTypesSetError : String = " Couldn't insert Elasticsearch types test data"
212+
213+ }
214+
215+
216+ trait ElasticSearchDataTypesDefaultConstants extends ElasticSearchDefaultConstants {
217+ private lazy val config = ConfigFactory .load()
218+ override val Index = s " idxname ${UUID .randomUUID.toString.replaceAll(" -" , " " )}"
219+ override val Type = s " typename ${UUID .randomUUID.toString.replaceAll(" -" , " " )}"
220+
221+ }
0 commit comments