Skip to content
This repository was archived by the owner on Sep 2, 2024. It is now read-only.

Commit 9af1ce7

Browse files
add actual fields from new file as comments to structure class
1 parent c0a4308 commit 9af1ce7

File tree

2 files changed

+20
-11
lines changed

2 files changed

+20
-11
lines changed

batch/src/main/scala/uk/gov/ons/addressindex/models/CSVSchemas.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ object CSVSchemas {
99

1010
/**
1111
* Postcode address CSV file schema
12+
* RECORD_IDENTIFIER,CHANGE_TYPE,PRO_ORDER,UPRN,UDPRN,ORGANISATION_NAME,DEPARTMENT_NAME,SUB_BUILDING_NAME,BUILDING_NAME,BUILDING_NUMBER,DEPENDENT_THOROUGHFARE,THOROUGHFARE,DOUBLE_DEPENDENT_LOCALITY,DEPENDENT_LOCALITY,POST_TOWN,POSTCODE,POSTCODE_TYPE,DELIVERY_POINT_SUFFIX,WELSH_DEPENDENT_THOROUGHFARE,WELSH_THOROUGHFARE,WELSH_DOUBLE_DEPENDENT_LOCALITY,WELSH_DEPENDENT_LOCALITY,WELSH_POST_TOWN,PO_BOX_NUMBER,PROCESS_DATE,START_DATE,END_DATE,LAST_UPDATE_DATE,ENTRY_DATE,EPOCH
1213
*/
1314
val postcodeAddressFileSchema = StructType(Seq(
1415
StructField("recordIdentifier", ByteType, nullable = false),
@@ -44,6 +45,7 @@ object CSVSchemas {
4445

4546
/**
4647
* BLPU CSV file schema
48+
* RECORD_IDENTIFIER,CHANGE_TYPE,PRO_ORDER,UPRN,LOGICAL_STATUS,BLPU_STATE,BLPU_STATE_DATE,PARENT_UPRN,X_COORDINATE,Y_COORDINATE,LATITUDE,LONGITUDE,RPC,LOCAL_CUSTODIAN_CODE,COUNTRY,START_DATE,END_DATE,LAST_UPDATE_DATE,ENTRY_DATE,ADDRESSBASE_POSTAL,POSTCODE_LOCATOR,MULTI_OCC_COUNT,EPOCH
4749
*/
4850
val blpuFileSchema = StructType(Seq(
4951
StructField("recordIdentifier", ByteType, nullable = false),
@@ -72,6 +74,7 @@ object CSVSchemas {
7274

7375
/**
7476
* Classification CSV file schema
77+
* RECORD_IDENTIFIER,CHANGE_TYPE,PRO_ORDER,UPRN,CLASS_KEY,CLASSIFICATION_CODE,CLASS_SCHEME,SCHEME_VERSION,START_DATE,END_DATE,LAST_UPDATE_DATE,ENTRY_DATE,EPOCH
7578
*/
7679
val classificationFileSchema = StructType(Seq(
7780
StructField("recordIdentifier", ByteType, nullable = false),
@@ -90,6 +93,7 @@ object CSVSchemas {
9093

9194
/**
9295
* crossref CSV file schema
96+
* RECORD_IDENTIFIER,CHANGE_TYPE,PRO_ORDER,UPRN,XREF_KEY,CROSS_REFERENCE,VERSION,SOURCE,START_DATE,END_DATE,LAST_UPDATE_DATE,ENTRY_DATE,EPOCH
9397
*/
9498
val crossrefFileSchema = StructType(Seq(
9599
StructField("recordIdentifier", ByteType, nullable = false),
@@ -108,6 +112,7 @@ object CSVSchemas {
108112

109113
/**
110114
* lpi CSV file schema
115+
* RECORD_IDENTIFIER,CHANGE_TYPE,PRO_ORDER,UPRN,LPI_KEY,LANGUAGE,LOGICAL_STATUS,START_DATE,END_DATE,LAST_UPDATE_DATE,ENTRY_DATE,SAO_START_NUMBER,SAO_START_SUFFIX,SAO_END_NUMBER,SAO_END_SUFFIX,SAO_TEXT,PAO_START_NUMBER,PAO_START_SUFFIX,PAO_END_NUMBER,PAO_END_SUFFIX,PAO_TEXT,USRN,USRN_MATCH_INDICATOR,AREA_NAME,LEVEL,OFFICIAL_FLAG,EPOCH
111116
*/
112117
val lpiFileSchema = StructType(Seq(
113118
StructField("recordIdentifier", ByteType, nullable = false),
@@ -140,6 +145,7 @@ object CSVSchemas {
140145

141146
/**
142147
* organisation CSV file schema
148+
* RECORD_IDENTIFIER,CHANGE_TYPE,PRO_ORDER,UPRN,ORG_KEY,ORGANISATION,LEGAL_NAME,START_DATE,END_DATE,LAST_UPDATE_DATE,ENTRY_DATE,EPOCH
143149
*/
144150
val organisationFileSchema = StructType(Seq(
145151
StructField("recordIdentifier", ByteType, nullable = false),
@@ -157,6 +163,7 @@ object CSVSchemas {
157163

158164
/**
159165
* street CSV file schema
166+
* RECORD_IDENTIFIER,CHANGE_TYPE,PRO_ORDER,USRN,RECORD_TYPE,SWA_ORG_REF_NAMING,STATE,STATE_DATE,STREET_SURFACE,STREET_CLASSIFICATION,VERSION,STREET_START_DATE,STREET_END_DATE,LAST_UPDATE_DATE,RECORD_ENTRY_DATE,STREET_START_X,STREET_START_Y,STREET_START_LAT,STREET_START_LONG,STREET_END_X,STREET_END_Y,STREET_END_LAT,STREET_END_LONG,STREET_TOLERANCE,EPOCH
160167
*/
161168
val streetFileSchema = StructType(Seq(
162169
StructField("recordIdentifier", ByteType, nullable = false),
@@ -187,6 +194,7 @@ object CSVSchemas {
187194

188195
/**
189196
* street-descriptor CSV file schema
197+
* RECORD_IDENTIFIER,CHANGE_TYPE,PRO_ORDER,USRN,STREET_DESCRIPTOR,LOCALITY,TOWN_NAME,ADMINSTRATIVE_AREA,LANGUAGE,START_DATE,END_DATE,LAST_UPDATE_DATE,ENTRY_DATE,EPOCH
190198
*/
191199
val streetDescriptorFileSchema = StructType(Seq(
192200
StructField("recordIdentifier", ByteType, nullable = false),
@@ -222,6 +230,7 @@ object CSVSchemas {
222230

223231
/**
224232
* hierarchy CSV file schema
233+
* UPRN,PRIMARY_UPRN,SECONDARY_UPRN,LAYERS,THIS_LAYER,PARENT_UPRN,ADDRESS_TYPE,ESTAB_TYPE,EPOCH
225234
*/
226235
val hierarchyFileSchema = StructType(Seq(
227236
StructField("uprn", LongType, nullable = false),

batch/src/main/scala/uk/gov/ons/addressindex/utils/SqlHelper.scala

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,9 @@ object SqlHelper {
194194
*/
195195
def aggregateHybridIndex(paf: DataFrame, nag: DataFrame, historical: Boolean = true): RDD[HybridAddressEsDocument] = {
196196

197-
// val rdmfGrouped = aggregateRDMFInformation(AddressIndexFileReader.readRDMFCSV())
198-
// .groupBy("uprn")
199-
// .agg(functions.collect_list(functions.struct("address_entry_id","address_entry_id_alphanumeric_backup")).as("entryids"))
197+
val rdmfGrouped = aggregateRDMFInformation(AddressIndexFileReader.readRDMFCSV())
198+
.groupBy("uprn")
199+
.agg(functions.collect_list(functions.struct("address_entry_id","address_entry_id_alphanumeric_backup")).as("entryids"))
200200

201201
val crossRefGrouped = aggregateCrossRefInformation(AddressIndexFileReader.readCrossrefCSV())
202202
.groupBy("uprn")
@@ -227,7 +227,7 @@ object SqlHelper {
227227
.join(crossRefGrouped, Seq("uprn"), "left_outer")
228228
.join(hierarchyJoinedWithRelatives, Seq("uprn"), "left_outer")
229229
.join(classificationsGrouped, Seq("uprn"), "left_outer")
230-
// .join(rdmfGrouped, Seq("uprn"), "left_outer")
230+
.join(rdmfGrouped, Seq("uprn"), "left_outer")
231231

232232
pafNagCrossHierGrouped.rdd.map {
233233
row =>
@@ -301,14 +301,14 @@ object SqlHelper {
301301
val mixedPartialTokens = mixedPartial.flatMap(_.toString.split(",").filter(_.nonEmpty)).distinct.mkString(",")
302302
val mixedPartialTokensExtraDedup = mixedPartialTokens.replaceAll(","," ").split(" ").distinct.mkString(" ").replaceAll(" "," ")
303303

304-
// val entryIds = Option(row.getAs[Seq[Row]]("entryids")).getOrElse(Seq())
305-
// val addressEntryId: Option[Long] = entryIds.map(row => row.getAs[Long]("address_entry_id")).headOption
306-
// // field with incorrect name retained temporarily for compatibility
307-
// val onsAddressId = addressEntryId
308-
// val addressEntryIdAlphanumericBackup: Option[String] = entryIds.map(row => row.getAs[String]("address_entry_id_alphanumeric_backup")).headOption
304+
val entryIds = Option(row.getAs[Seq[Row]]("entryids")).getOrElse(Seq())
305+
val addressEntryId: Option[Long] = entryIds.map(row => row.getAs[Long]("address_entry_id")).headOption
306+
// field with incorrect name retained temporarily for compatibility
307+
// val onsAddressId = addressEntryId
308+
val addressEntryIdAlphanumericBackup: Option[String] = entryIds.map(row => row.getAs[String]("address_entry_id_alphanumeric_backup")).headOption
309309

310-
val addressEntryId: Option[Long] = Some(1L)
311-
val addressEntryIdAlphanumericBackup: Option[String] = Some("1")
310+
// val addressEntryId: Option[Long] = Some(1L)
311+
// val addressEntryIdAlphanumericBackup: Option[String] = Some("1")
312312

313313
HybridAddressEsDocument(
314314
uprn,

0 commit comments

Comments
 (0)