BigQuery Python - Case Study - Patient Timeline - Scatter plot
#
import os, sys
from google.cloud import bigquery
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
# Connect to client
client = bigquery.Client()
# Query
query = """ WITH temp AS (
SELECT ssdl.*,
# The function sandbox.codeProcess returns processed CODE1, CODE2 and CODE3 along with vocabulary_id
sandbox.codeProcess(TO_JSON_STRING(ssdl), 'CODE1_CODE2', 'MORPO_BEH_TOPO', 'REIMB', 'ATC', 5, 5, 5, 7, 5).*
FROM `finngen-production-library.sandbox_tools_r10.finngen_r10_service_sector_detailed_longitudinal_v1` AS ssdl
# Here we are randomly selecting a FinnGen patient FINNGENID. You can replace this with a particular FINNGENID
WHERE FINNGENID = (
SELECT DISTINCT FINNGENID
FROM `finngen-production-library.sandbox_tools_r10.finngen_r10_service_sector_detailed_longitudinal_v1`
WHERE RAND() < 10/(SELECT COUNT(*) FROM `finngen-production-library.sandbox_tools_r10.finngen_r10_service_sector_detailed_longitudinal_v1`)
LIMIT 1
)
# The results of patient are ordered by the date
ORDER BY ssdl.APPROX_EVENT_DAY, ssdl.SOURCE
)
# The processed codes are mapped to English names
SELECT t.*,fgc.concept_class_id,fgc.name_en,fgc.name_fi
FROM temp AS t
LEFT JOIN `finngen-production-library.medical_codes.fg_codes_info_v1` as fgc
ON t.vocabulary_id = fgc.vocabulary_id AND
t.FG_CODE1 IS NOT DISTINCT FROM fgc.FG_CODE1 AND
t.FG_CODE2 IS NOT DISTINCT FROM fgc.FG_CODE2 AND
t.FG_CODE3 IS NOT DISTINCT FROM fgc.FG_CODE3
"""
# Job configuration
job_config = bigquery.QueryJobConfig()
# Run the queries
query_result = client.query(query,job_config=job_config)PreviousBigQuery Python - Case Study - Comorbidity - Upset plotNextSandbox internal API for software developers
Last updated
Was this helpful?