commit
915850c1b0
@ -0,0 +1 @@ |
|||||||
|
{"name":"sbt","version":"1.9.8","bspVersion":"2.1.0-M1","languages":["scala"],"argv":["C:\\Program Files\\Eclipse Adoptium\\jdk-8.0.392.8-hotspot\\jre/bin/java","-Xms100m","-Xmx100m","-classpath","C:/Users/11/AppData/Roaming/JetBrains/IntelliJIdea2021.3/plugins/Scala/launcher/sbt-launch.jar","xsbt.boot.Boot","-bsp","--sbt-launch-jar=C:/Users/11/AppData/Roaming/JetBrains/IntelliJIdea2021.3/plugins/Scala/launcher/sbt-launch.jar"]} |
||||||
@ -0,0 +1,13 @@ |
|||||||
|
ThisBuild / version := "0.1.0-SNAPSHOT" |
||||||
|
// https://mvnrepository.com/artifact/org.apache.spark/spark-core |
||||||
|
libraryDependencies += "org.apache.spark" %% "spark-core" % "3.5.0" |
||||||
|
// https://mvnrepository.com/artifact/org.apache.spark/spark-sql |
||||||
|
libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.5.0" % "provided" |
||||||
|
// https://mvnrepository.com/artifact/org.postgresql/postgresql |
||||||
|
libraryDependencies += "org.postgresql" % "postgresql" % "42.7.1" |
||||||
|
ThisBuild / scalaVersion := "2.13.12" |
||||||
|
|
||||||
|
lazy val root = (project in file(".")) |
||||||
|
.settings( |
||||||
|
name := "TECHDM_TRANSACTION" |
||||||
|
) |
||||||
@ -0,0 +1 @@ |
|||||||
|
sbt.version = 1.9.8 |
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,165 @@ |
|||||||
|
package task |
||||||
|
|
||||||
|
import org.apache.spark.sql.functions._ |
||||||
|
import org.apache.spark.sql.types._ |
||||||
|
import org.apache.spark.sql._ |
||||||
|
|
||||||
|
import java.util.Properties |
||||||
|
|
||||||
|
object task1 extends App { |
||||||
|
|
||||||
|
def formatOfr(value: String, account: String): String = { |
||||||
|
def formatSubstring(str: String): String = |
||||||
|
if (str.endsWith("00")) str.substring(0, 5) |
||||||
|
else s"${str.substring(0, 5)}.${str.substring(5, 7)}" |
||||||
|
|
||||||
|
Option(value).collect { |
||||||
|
case v if v.length == 7 => formatSubstring(v) |
||||||
|
case v if v.length == 5 || v.contains('.') => v |
||||||
|
}.getOrElse(formatSubstring(account.substring(13))) |
||||||
|
} |
||||||
|
|
||||||
|
val formatOfrUDF = udf(formatOfr _) |
||||||
|
|
||||||
|
def processDate(date: String, dtAccount: String, ktAccount: String): String = { |
||||||
|
if (dtAccount.startsWith("707") || ktAccount.startsWith("707")) { |
||||||
|
val originalDate = java.time.LocalDate.parse(date) |
||||||
|
val adjustedDate = originalDate.minusYears(1) |
||||||
|
adjustedDate.toString |
||||||
|
} else date |
||||||
|
} |
||||||
|
|
||||||
|
val processDateUDF = udf(processDate _) |
||||||
|
|
||||||
|
def makeTechdmTransaction(dataProductInput: String, startDate: Option[Column] = None, endDate: Option[Column] = None) = { |
||||||
|
val spark = SparkSession.builder.master("local").appName("Task1").getOrCreate() |
||||||
|
|
||||||
|
val acct = spark.read.parquet("src/main/resources/data/acct/part-00000-6883a1c2-b42b-437c-96d5-15762033328a-c000.snappy.parquet") |
||||||
|
val acctH = spark.read.parquet("src/main/resources/data/acct_h/part-00000-5ecc49e3-ce9c-4f36-a62f-0570f3c29630-c000.snappy.parquet") |
||||||
|
val crncy = spark.read.parquet("src/main/resources/data/crncy/part-00000-f9e00d5e-88f1-4f9e-b5f8-8077e9202152-c000.snappy.parquet") |
||||||
|
val div = spark.read.parquet("src/main/resources/data/div_/part-00000-711b71d0-68d4-40b5-b6c4-76050a546c2e-c000.snappy.parquet") |
||||||
|
val frsymb = spark.read.parquet("src/main/resources/data/frsymb/part-00000-96618fb3-d593-4e78-a1e1-2eb2ca8d1102-c000.snappy.parquet") |
||||||
|
val paydoc = spark.read.parquet("src/main/resources/data/paydoc/part-00000-179f9159-0a68-4b76-a8b3-9a205e9b4569-c000.snappy.parquet") |
||||||
|
val txn = spark.read.parquet("src/main/resources/data/txn/part-00000-02fff546-1c15-4c28-821e-59e0de92feda-c000.snappy.parquet") |
||||||
|
|
||||||
|
val pgConnectionProperties = new Properties() |
||||||
|
pgConnectionProperties.put("user", "postgres") |
||||||
|
pgConnectionProperties.put("password", "12345678") |
||||||
|
val dbString = "jdbc:postgresql://localhost:5432/task1" |
||||||
|
|
||||||
|
val dataProduct = spark.read.jdbc(dbString, "public.data_product", pgConnectionProperties) |
||||||
|
val techdmSettingTransactionFilter = spark.read.jdbc(dbString, "public.techdm_setting_transaction_filter", pgConnectionProperties) |
||||||
|
|
||||||
|
val productRow = dataProduct.filter(dataProduct("dp_code") === dataProductInput).collect()(0) |
||||||
|
val asnuCodeFk = productRow.getAs[String]("asnu_code_fk") |
||||||
|
val transactionDepth = productRow.getAs[Int]("transaction_depth_update") |
||||||
|
|
||||||
|
def checkIfDatesAreSet(start_dt: Option[Column], end_dt: Option[Column]): Boolean = start_dt.isDefined && end_dt.isDefined |
||||||
|
|
||||||
|
val tmpMaxDate = lit(paydoc.select(max(paydoc("input_dttm"))).first().getAs[TimestampType](0)) |
||||||
|
|
||||||
|
val currentDate = current_timestamp() |
||||||
|
|
||||||
|
val finalStartDate: Column = if (checkIfDatesAreSet(startDate, endDate)) startDate.get |
||||||
|
else if (tmpMaxDate == null) date_sub(currentDate, transactionDepth) |
||||||
|
else least(date_sub(tmpMaxDate, transactionDepth), date_sub(currentDate, transactionDepth)) |
||||||
|
|
||||||
|
val finalEndDate = if (checkIfDatesAreSet(startDate, endDate)) endDate.get else date_sub(currentDate, 1) |
||||||
|
|
||||||
|
val transactionFilters = techdmSettingTransactionFilter.filter(techdmSettingTransactionFilter("dp_code_fk") === dataProductInput) |
||||||
|
val dtFilter = transactionFilters.select(col("dt_account_mask_list")) |
||||||
|
.filter(col("dt_account_mask_list").isNotNull) |
||||||
|
.collect |
||||||
|
.map(_.getString(0)) |
||||||
|
.flatMap(_.split(",\\s")) |
||||||
|
.map(mask => col("num").like(mask)) |
||||||
|
.reduce(_ or _) |
||||||
|
val ktFilter = transactionFilters.select(col("kt_account_mask_list")) |
||||||
|
.filter(col("kt_account_mask_list").isNotNull) |
||||||
|
.collect |
||||||
|
.map(_.getString(0)) |
||||||
|
.flatMap(_.split(",\\s")) |
||||||
|
.map(mask => col("num").like(mask)) |
||||||
|
.reduce(_ or _) |
||||||
|
|
||||||
|
//Отбор данных |
||||||
|
val filteredPaydoc = paydoc.filter(finalStartDate <= paydoc("input_dttm") && paydoc("input_dttm") <= finalEndDate && paydoc("paydoc_dp_code") === dataProductInput) |
||||||
|
|
||||||
|
//полупроводки по дт и кт |
||||||
|
val dtPaydoc = filteredPaydoc.join(txn, filteredPaydoc("paydoc_sid") === txn("paydoc_sid") && filteredPaydoc("paydoc_dp_code") === txn("paydoc_dp_code") && txn("cred_flag") === false).drop(txn("input_dttm")) |
||||||
|
val ktPaydoc = filteredPaydoc.join(txn, filteredPaydoc("paydoc_sid") === txn("paydoc_sid") && filteredPaydoc("paydoc_dp_code") === txn("paydoc_dp_code") && txn("cred_flag") === true).drop(txn("input_dttm")) |
||||||
|
|
||||||
|
//данные по счетам |
||||||
|
val dtAccount = dtPaydoc.join(acct, acct("acct_sk") === txn("acct_sk") && acct("acct_dp_code") === txn("acct_dp_code")).drop(acct("acct_sk"), txn("acct_dp_code")).filter(dtFilter) |
||||||
|
val ktAccount = ktPaydoc.join(acct, acct("acct_sk") === txn("acct_sk") && acct("acct_dp_code") === txn("acct_dp_code")).drop(acct("acct_sk"), txn("acct_dp_code")).filter(ktFilter) |
||||||
|
|
||||||
|
//данные по валюте |
||||||
|
val dtCur = dtAccount.join(crncy, crncy("crncy_sk") === dtAccount("crncy_sk") && crncy("crncy_dp_code") === dtAccount("crncy_dp_code")) |
||||||
|
val ktCur = ktAccount.join(crncy, crncy("crncy_sk") === ktAccount("crncy_sk") && crncy("crncy_dp_code") === ktAccount("crncy_dp_code")) |
||||||
|
|
||||||
|
//филиал |
||||||
|
val dtFilial = dtCur.join(acctH, acctH("acct_sk") === dtCur("acct_sk") && acctH("acct_dp_code") === dtCur("acct_dp_code") && acctH("end_dt") >= dtCur("input_dttm"), "left") |
||||||
|
val ktFilial = ktCur.join(acctH, acctH("acct_sk") === ktCur("acct_sk") && acctH("acct_dp_code") === ktCur("acct_dp_code") && acctH("end_dt") >= ktCur("input_dttm"), "left") |
||||||
|
|
||||||
|
val dtFilialWithDiv = dtFilial.join(div, div("div_sk") === dtFilial("bal_div_sk") && div("div_dp_code") === dtFilial("bal_div_dp_code"), "left") |
||||||
|
val ktFilialWithDiv = ktFilial.join(div, div("div_sk") === ktFilial("bal_div_sk") && div("div_dp_code") === ktFilial("bal_div_dp_code"), "left") |
||||||
|
|
||||||
|
//Символы ОФР |
||||||
|
val dtFrSymb = dtFilialWithDiv.join(frsymb, frsymb("frsymb_sk") === dtFilialWithDiv("frsymb_sk") && frsymb("frsymb_dp_code") === dtFilialWithDiv("frsymb_dp_code"), "left") |
||||||
|
val ktFrSymb = ktFilialWithDiv.join(frsymb, frsymb("frsymb_sk") === ktFilialWithDiv("frsymb_sk") && frsymb("frsymb_dp_code") === ktFilialWithDiv("frsymb_dp_code"), "left") |
||||||
|
|
||||||
|
//сборка дт и кт |
||||||
|
val dtFull = dtFrSymb |
||||||
|
.withColumn("asnu_code_fk", lit(asnuCodeFk)) |
||||||
|
.withColumn("dp_code_fk", lit(dataProductInput)) |
||||||
|
.select(filteredPaydoc("paydoc_sid") as "transaction_ek", |
||||||
|
filteredPaydoc("input_dttm") as "transaction_date", |
||||||
|
to_date(filteredPaydoc("input_dttm")) as "input_dt", |
||||||
|
to_date(filteredPaydoc("bal_dttm")) as "value_date", |
||||||
|
filteredPaydoc("doc_num") as "mem_order_number", |
||||||
|
filteredPaydoc("doc_dt") as "mem_order_date", |
||||||
|
acct("sid") as "dt_account_fk", |
||||||
|
acct("acct_dp_code") as "dt_account_dp_code", |
||||||
|
acct("num") as "dt_account", |
||||||
|
crncy("sid") as "dt_currency_fk", |
||||||
|
crncy("crncy_dp_code") as "dt_currency_dp_code", |
||||||
|
crncy("iso_code") as "dt_currency_alfa_code", |
||||||
|
crncy("digital_code") as "dt_currency_digital_code", |
||||||
|
txn("sum_act_amt") as "dt_amount", |
||||||
|
txn("sum_src_bal_amt") as "amount_rub", |
||||||
|
div("sid") as "dt_filial_fk", |
||||||
|
div("div_dp_code") as "dt_filial_dp_code", |
||||||
|
div("code") as "dt_filial_code", |
||||||
|
formatOfrUDF(frsymb("num"), acct("num")) as "dt_ofr", |
||||||
|
filteredPaydoc("doc_purp_txt") as "payment_purpose", |
||||||
|
filteredPaydoc("ctl_validfrom") as "ctl_validfrom_ts" |
||||||
|
) |
||||||
|
val ktFull = ktFrSymb |
||||||
|
.withColumn("asnu_code_fk", lit(asnuCodeFk)) |
||||||
|
.withColumn("dp_code_fk", lit(dataProductInput)) |
||||||
|
.select(filteredPaydoc("paydoc_sid") as "transaction_ek", |
||||||
|
acct("sid") as "kt_account_fk", |
||||||
|
acct("acct_dp_code") as "kt_account_dp_code", |
||||||
|
acct("num") as "kt_account", |
||||||
|
crncy("sid") as "kt_currency_fk", |
||||||
|
crncy("crncy_dp_code") as "kt_currency_dp_code", |
||||||
|
crncy("iso_code") as "kt_currency_alfa_code", |
||||||
|
crncy("digital_code") as "kt_currency_digital_code", |
||||||
|
txn("sum_act_amt") as "kt_amount", |
||||||
|
div("sid") as "kt_filial_fk", |
||||||
|
div("div_dp_code") as "kt_filial_dp_code", |
||||||
|
div("code") as "kt_filial_code", |
||||||
|
formatOfrUDF(frsymb("num"), acct("num")) as "kt_ofr" |
||||||
|
) |
||||||
|
//итоговый dataframe |
||||||
|
val techdmSrcTransaction = dtFull.join(ktFull, dtFull("transaction_ek") === ktFull("transaction_ek")).drop(ktFull("transaction_ek")) |
||||||
|
.withColumn("operation_date", to_date(processDateUDF(col("value_date"), col("dt_account"), col("kt_account")) as "dt_operation_date")) |
||||||
|
techdmSrcTransaction.show() |
||||||
|
//techdmSrcTransaction.write.mode("overwrite").save("src/main/resources/data/result") |
||||||
|
techdmSrcTransaction.write.mode("overwrite").jdbc(dbString, "techdm_transaction", pgConnectionProperties) |
||||||
|
spark.stop() |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
makeTechdmTransaction("ndl_eks_gl", Option(to_date(lit("2015-03-06"))), Option(to_date(lit("2024-03-06")))) |
||||||
|
} |
||||||
Loading…
Reference in new issue