--This SQL script produces a table for the statistic of interest with the defined Get Data Out groups for Soft Tissue and Peripheral Nerves sarcomas.
--Run on CASREF01
-- cohort for Soft Tissue and Peripheral Nerves for GDO
-- C47, C48 excluding the bits that got included in ovary, C49
-- defined in ICD-10
with tumour_cohort_soft as
(
select tumourid
, site_icd10_o2_3char
, diagnosisyear
, age
from av2017.at_tumour_england
where
( site_icd10_o2_3char in ('C47','C49') OR
(site_icd10_o2_3char = 'C48' and sex = 1) -- All male peritoneal cancers
OR
-- all female peritoneal cancers which are 'sarcomas' and other morphologies excluded from ovary
-- between here and ovary we should be counting ALL tumours at C48
(site_icd10_o2_3char = 'C48' and ((morph_icd10_o2 in (8693, 8800, 8801, 8802, 8803, 8804, 8805, 8806, 8810, 8963,
8990, 8991, 9040, 9041, 9042, 9043, 9044, 9490, 9500))
or (morph_icd10_o2 between 8811 and 8921)
or (morph_icd10_o2 between 9120 and 9373)
or (morph_icd10_o2 between 9530 and 9582)
)
)
)
and diagnosisyear between 2013 and 2017
and cascade_inci_flag = 1
)
,
statisticofinterest_soft as
(select tumourid, 1 as statisticofinterest_soft from tumour_cohort_soft),
tumour_cohort_linked_soft AS ( SELECT * FROM tumour_cohort_soft tc_soft
LEFT JOIN statisticofinterest_soft ON statisticofinterest_soft.tumourid = tc_soft.tumourid
)
, extractpartition_soft as (
SELECT '2013' AS "Year",
SUM(statisticofinterest_soft) as statistic_of_interest
FROM tumour_cohort_linked_soft
WHERE DIAGNOSISYEAR = '2013'
UNION ALL
SELECT '2014' AS "Year",
SUM(statisticofinterest_soft) as statistic_of_interest
FROM tumour_cohort_linked_soft
WHERE DIAGNOSISYEAR = '2014'
UNION ALL
SELECT '2015' AS "Year",
SUM(statisticofinterest_soft) as statistic_of_interest
FROM tumour_cohort_linked_soft
WHERE DIAGNOSISYEAR = '2015'
UNION ALL
SELECT '2016' AS "Year",
SUM(statisticofinterest_soft) as statistic_of_interest
FROM tumour_cohort_linked_soft
WHERE DIAGNOSISYEAR = '2016'
UNION ALL
SELECT '2017' AS "Year",
SUM(statisticofinterest_soft) as statistic_of_interest
FROM tumour_cohort_linked_soft
WHERE DIAGNOSISYEAR = '2017'
UNION ALL
SELECT '2013-2015' AS "Year",
SUM(statisticofinterest_soft) as statistic_of_interest
FROM tumour_cohort_linked_soft
WHERE DIAGNOSISYEAR BETWEEN 2013 AND 2015
UNION ALL
SELECT '2014-2016' AS "Year",
SUM(statisticofinterest_soft) as statistic_of_interest
FROM tumour_cohort_linked_soft
WHERE DIAGNOSISYEAR BETWEEN 2014 AND 2016
UNION ALL
SELECT '2015-2017' AS "Year",
SUM(statisticofinterest_soft) as statistic_of_interest
FROM tumour_cohort_linked_soft
WHERE DIAGNOSISYEAR BETWEEN 2015 AND 2017
) ,
lsoatoregionlookup
as
(
select distinct lsoa11, nhser17nm as Region from nspl_201805
left outer join ANALYSISNCR.CCG18_NHSER17_18_LKP_DATALAKE nr
on nspl_201805.ccg = nr.ccgapr18cd
where substr(lsoa11,1,1) = 'E'
)
, lsoatoalllookup
as
(
select distinct lsoa11, 'All' as Region from nspl_201805
where substr(lsoa11,1,1) = 'E'
)
, geoglookup as
(select * from lsoatoregionlookup
union
select * from lsoatoalllookup)
/*
reminding myself how dual works:
select rownum from dual;
select rownum from dual
-- LAST YEAR NEEDED
connect by level <= 3;
*/
,sexlookup
as
(
select 1 as basesex, '1' as sex from dual
union
select 2 as basesex, '2' as sex from dual
union
select 1 as basesex, 'All' as sex from dual
union
select 2 as basesex, 'All' as sex from dual
)
-- year look up
-- This is hacky, there must be a better way?
-- But it does produce a look up table for 2013-2017
-- and also the three year rolling groups for 2013-2017
,yearlookup
as
(
select n as baseyear
, to_char(n) as year
from
( select rownum n from dual
-- LAST YEAR NEEDED
connect by level <= 2017
)
-- FIRST YEAR NEEDED
where n >= 2013
UNION
select n as baseyear
, '2013-2015' as year
from
( select rownum n from dual
-- LAST YEAR NEEDED
connect by level <= 2015
)
-- FIRST YEAR NEEDED
where n >= 2013
UNION
select n as baseyear
, '2014-2016' as year
from
( select rownum n from dual
-- LAST YEAR NEEDED
connect by level <= 2016
)
-- FIRST YEAR NEEDED
where n >= 2014
UNION
select n as baseyear
, '2015-2017' as year
from
( select rownum n from dual
-- LAST YEAR NEEDED
connect by level <= 2017
)
-- FIRST YEAR NEEDED
where n >= 2015
)
-- Making age lookup.
--select * from analysischarlieturner.ages;
-- We don't have a systematic 'we definitely used this age partition'
-- So we should assume any age partition in the age table could be being used.
-- There are currently 30 age partitions.
-- This is a bit tedious.
-- You can at least get a list of them:
/*
select COLUMN_NAME from ALL_TAB_COLUMNS
where owner = 'ANALYSISCHARLIETURNER'
and TABLE_NAME = 'AGES'
order by COLUMN_NAME;
*/
-- If I had grip, I would write some R to automate creating this table
-- Instead I did it in Excel
, agelookup
as
(
select distinct floor(AGES.AGE/5)+1 as baseage, 'All' as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_13_25_40_60_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_20_30_50_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_20_40 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_20_50_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_20_50_70_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_20_TO_40 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_20_TO_90 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_25 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_25_40_60 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_25_40_60_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_25_40_TO_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_25_50 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_30 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_30_50 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_30_50_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_30_50_70_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_30_TO_50 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_30_TO_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_35 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_40 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_40_TO_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_5_THEN_10 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_50 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_50_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_50_70_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_50_TO_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_50_TO_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_5_10_20_30_50_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_60 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_60_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_60_70_75_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_60_TO_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_70 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_70_75_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_70_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, SPLIT_80 as age from analysischarlieturner.ages
UNION
select distinct floor(AGES.AGE/5)+1 as baseage, TENYEARAGE as age from analysischarlieturner.ages)
-- So, I have a region lookup, a sex lookup, a year lookup, and an age lookup.
-- And some populations! Yay. Now just join them all together.
-- We have no joins for the Welsh LSOAs and for the years before 2013.
-- But that is what we wanted
-- All ages and sexs have joined to something.
, population_table as
(
select g.region, s.sex, y.year, a.age, sum(popcount) as popcount from ons2017.populations_normalised POPS
left outer join geoglookup g
on pops.lsoa11 = g.lsoa11
left outer join sexlookup s
on pops.sex = s.basesex
left outer join yearlookup y
on pops.year = y.baseyear
left outer join agelookup a
on pops.quinaryagegroupint = a.baseage
group by g.region, s.sex, y.year, a.age
)
,
soft as(
SELECT
'Soft tissue and peripheral nerves' AS "Cancer Site",
"Year" AS "Year",
'All' AS "Tumour Type",
'All' AS "Tumour Type 2",
'All' AS "Tumour Type 3",
'All' AS "Basis of Diagnosis",
'All stages' AS "Stage",
'Not split by stage detail' AS "Stage detail",
'All ages' AS "Age",
'All England' AS "Region",
'Persons' AS "Sex",
'Not split by grade' AS "Grade",
'Not split by nodal status' AS "Nodal Status",
statistic_of_interest as "statistic_of_interest",
pops.popcount as "Population"
from extractpartition_soft ep
left outer join population_table pops
on ep."Year" = pops.year
and pops.age = 'All'
where pops.region ='All'
and pops.sex='All'
)
select * from soft;