Credit Card Application Records
SQL Database Project
In this project we run mid-level (complexity) queries on a credit card dataset using SQL Server Management Studio.
Techniques used here include aggregating data using group by statements, partitioning, Common Table Expressions (CTE), formatting data, cleaning data, WITH clause, subsets, creating & retrieving views,...
There are two tables, which are connected by ID. The source data set can be found on Kaggle Here.
​
Note: Our dataset has more than 1,000,000 rows. To save space, tables below show only first few rows of results.
You can get the full results by running the complete code, available at the end of this page.
​
​
--Credit Cards Records and applications Data Exploration
--Skills Used: Joins, CTE's, Temp Tables, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
Select *
From CreditCard..application_record$
Where CODE_GENDER='F'
Order By 6 Desc,7
​
​
​
​
​
​
​
​
​
​
​
​
​
-- Calculate the life percentage work which shows what percentage in their life, each employee has worked.
-- AS an example if a person is 40 years old and have been working for 20 years, it equals 50% lifepercentageworked
Select NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS , OCCUPATION_TYPE , DAYS_EMPLOYED , DAYS_BIRTH ,
Round( (DAYS_EMPLOYED / DAYS_BIRTH) * 100,1) AS 'Life_%_Worked'
From CreditCard..application_record$
Where OCCUPATION_TYPE is not null
Order By 6 Desc
​
​
-- Here we want to Group rows that have the same values into Summary rows, like NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS , NAME_housing_TYPE
-- and then aggregate data using Sume function to calculate Life_%_Worked_Per_Group
Select NAME_EDUCATION_TYPE
AS
Education_Level , NAME_FAMILY_STATUS AS Relationship_Status ,
NAME_housing_TYPE AS Residence,
Round( -avg(DAYS_EMPLOYED) / 365 ,1) AS Avg_Years_Employed , Round( -avg( DAYS_BIRTH) / 365 , 1) AS Avg_Age,
Round( Sum(DAYS_EMPLOYED) / Sum(DAYS_BIRTH) * 100,1) AS 'Life_%_Worked_Per_Group'
From CreditCard..application_record$
Where OCCUPATION_TYPE is not null
Group By NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS , NAME_housing_TYPE
Order By 6 Desc
​
​
-- Find which Groups have the highest percentage of their life worked
-- there is no BOTTOM operator in SQL ; if needed you can simply sort (Order By) it in reverse
Select
Top 10
NAME_EDUCATION_TYPE AS Education_Level , NAME_FAMILY_STATUS AS Relationship_Status ,
NAME_housing_TYPE AS Residence,
Round( -avg(DAYS_EMPLOYED) / 365 ,1) AS Avg_Years_Employed , Round( -avg( DAYS_BIRTH) / 365 , 1) AS Avg_Age,
Round( Sum(DAYS_EMPLOYED) / Sum(DAYS_BIRTH) * 100,1) AS 'Life_%_Worked_Per_Group'
From CreditCard..application_record$
Where OCCUPATION_TYPE is not null
Group By NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS , NAME_housing_TYPE
Order By 6 Desc
​
​
​
​
--Finding the Average_Years_employed For each occupation type
Select OCCUPATION_TYPE , Round(-avg(days_employed/365),1) AS Average_Years_employed
From CreditCard..application_record$
Where OCCUPATION_TYPE is not null
Group By OCCUPATION_TYPE
Order By 2 Desc
​
​
​
​
​
​
​
​
​
​
​
​
-- BREAKING THINGS DOWN By NAME_FAMILY_STATUS , NAME_INCOME_TYPE
-- Showing Family status and income type with the highest Average House income
-- Exclude subsets that are too small to avoid inaccurate insights
Select NAME_FAMILY_STATUS , NAME_INCOME_TYPE , Count(*) AS G_members_Count ,Round( avg(AMT_INCOME_TOTAL) / 1000 ,0 ) AS Avg_HoUse_Income_X_$1000
From CreditCard..application_record$
Group By NAME_FAMILY_STATUS , NAME_INCOME_TYPE
Having Count(*) >=10
Order By 4 Desc
​
​
​
​
​
​
​
​
-- GLOBAL NUMBERS
-- In original dataset 365243 days equals to 1000 years which cant be true for DAYS_EMPLOYED
-- Such data are excluded
Select
Sum(AMT_INCOME_TOTAL) AS Sum_Total_Income , Sum(CNT_FAM_MEMBERS) AS Sum_Total_Family_Memebrs,
Round(Sum(-DAYS_EMPLOYED / 365),0) AS Sum_Total_Years_Employed
From CreditCard..application_record$
Where DAYS_EMPLOYED != 365243
​
​
-- Joining the 2 sources tables with all columns included
Select *
From CreditCard..application_record$ AS App
Join CreditCard..credit_record$ AS Crd
on app.id=crd.id
​
​
-- We Use SQL Partition By to divide the result set into Partitions and perform computation On each subset of Partitioned data.
-- For each subset based On NAME_income_TYPE ,NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS , NAME_HOUSING_TYPE we want to calculate rolling_months_balance
-- That represents sum total of MONTHS_BALANCE for that Group
Select
app.id, NAME_INCOME_TYPE,NAME_EDUCATION_TYPE, NAME_FAMILY_STATUS , NAME_HOUSING_TYPE , -MONTHS_BALANCE AS Month_Count,
-Sum(months_balance) Over (Partition By NAME_income_TYPE ,NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS ,
NAME_HOUSING_TYPE ) AS rolling_months_balance
From CreditCard..application_record$ AS App
Join CreditCard..credit_record$ AS Crd
on app.id=crd.id
Where DAYS_EMPLOYED != 365243 and NAME_INCOME_TYPE is not null
Order By 7,1,2
​
​
-- We create subquery named SubQ to get the NAME_INCOME_TYPE,NAME_EDUCATION_TYPE, NAME_FAMILY_STATUS , NAME_HOUSING_TYPE , Month_Count after Joining 2 tables
-- Using CTE and With Clause to perform Calculation On Partition By in previous query
-- the common table expression (CTE) is a temporary named result set that you can reference within a Select, INSERT, UPDATE, or DELETE statement.
with SubQ AS
(Select app.id, NAME_INCOME_TYPE,NAME_EDUCATION_TYPE, NAME_FAMILY_STATUS , NAME_HOUSING_TYPE , -MONTHS_BALANCE AS Month_Count,
-Sum(months_balance) Over (Partition By NAME_income_TYPE ,NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS , NAME_HOUSING_TYPE ) AS rolling_months_balance
From CreditCard..application_record$ AS App
Join CreditCard..credit_record$ AS Crd
on app.id=crd.id
Where DAYS_EMPLOYED != 365243 and NAME_INCOME_TYPE is not null)
Select id, NAME_INCOME_TYPE,NAME_EDUCATION_TYPE, NAME_FAMILY_STATUS , NAME_HOUSING_TYPE , Month_Count
From subq
Where rolling_months_balance < 1000
Order By id
​
​
-- Create Temporary Table #Creditinfo to perform Calculation On Partition By in previous query
DROP Table if exists #Creditinfo
Create Table #Creditinfo
(
id numeric,
NAME_INCOME_TYPE nvarchar(255),
NAME_EDUCATION_TYPE nvarchar(255),
NAME_FAMILY_STATUS nvarchar(255),
NAME_HOUSING_TYPE nvarchar(255),
month_Count float,
rolling_months_balance float
)
Insert into #Creditinfo
Select app.id, NAME_INCOME_TYPE,NAME_EDUCATION_TYPE, NAME_FAMILY_STATUS , NAME_HOUSING_TYPE , -MONTHS_BALANCE AS Month_Count,
-Sum(months_balance) Over (Partition By NAME_income_TYPE ,NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS , NAME_HOUSING_TYPE ) AS rolling_months_balance
From CreditCard..application_record$ AS App
Join CreditCard..credit_record$ AS Crd
on app.id=crd.id
Where DAYS_EMPLOYED != 365243 and NAME_INCOME_TYPE is not null
Select * , Round((rolling_months_balance / month_Count) , 0) AS RMPM_ratio
From #Creditinfo
Where month_Count!=0
​
​
​
​
​
​
​
​
​
​
​
-- Creating View to store data
-- This can be Used for later visualizations in tableau
-- Refresh the source database in the object explorer to view the results
Use CreditCard
Go
Create View CCView_V5 AS
Select app.id, NAME_INCOME_TYPE,NAME_EDUCATION_TYPE, NAME_FAMILY_STATUS , NAME_HOUSING_TYPE , -MONTHS_BALANCE AS Month_Count,
-Sum(months_balance) Over (Partition By NAME_income_TYPE ,NAME_EDUCATION_TYPE , NAME_FAMILY_STATUS , NAME_HOUSING_TYPE ) AS rolling_months_balance
From CreditCard..application_record$ AS App
Join CreditCard..credit_record$ AS Crd
on app.id=crd.id
Where DAYS_EMPLOYED != 365243 and NAME_INCOME_TYPE is not null
---------------------------------------------------------
Select *
From CCView_V5
​
​
​
The complete code has been posted to GitHub repository, Here.
It is run in Microsoft SQL Server Management Studio.
Some SQL Dialects, may have a slightly different syntax.