From 46ff35bc87a349f64d97f2b4b66c40445afb5701 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Mon, 17 Feb 2025 21:05:00 -0800 Subject: [PATCH 01/26] beginning to add new company object --- integration_tests/dbt_project.yml | 1 + .../get_hubspot_deal_company_columns.sql | 11 +++ .../int_rag_hubspot__company_document.sql | 73 +++++++++++++++++++ .../hubspot_staging/src_rag_hubspot.yml | 13 ++++ .../stg_rag_hubspot__deal_company.sql | 46 ++++++++++++ 5 files changed, 144 insertions(+) create mode 100644 macros/staging/hubspot/get_hubspot_deal_company_columns.sql create mode 100644 models/intermediate/hubspot/int_rag_hubspot__company_document.sql create mode 100644 models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index fef72d6..5ffc2cd 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -21,6 +21,7 @@ vars: rag_hubspot_engagement_company_identifier: "hubspot_engagement_company" rag_hubspot_engagement_contact_identifier: "hubspot_engagement_contact" rag_hubspot_engagement_deal_identifier: "hubspot_engagement_deal" + rag_hubspot_engagement_deal_company: "hubspot_deal_company" rag_hubspot_company_identifier: "hubspot_company" rag_hubspot_contact_identifier: "hubspot_contact" rag_hubspot_owner_identifier: "hubspot_owner" diff --git a/macros/staging/hubspot/get_hubspot_deal_company_columns.sql b/macros/staging/hubspot/get_hubspot_deal_company_columns.sql new file mode 100644 index 0000000..aa8199c --- /dev/null +++ b/macros/staging/hubspot/get_hubspot_deal_company_columns.sql @@ -0,0 +1,11 @@ +{% macro get_hubspot_deal_company_columns() %} + +{% set columns = [ + {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, + {"name": "deal_id", "datatype": dbt.type_int()}, + {"name": "company_id", "datatype": dbt.type_int()} +] %} + +{{ return(columns) }} + +{% endmacro %} \ No newline at end of file diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql new file mode 100644 index 0000000..5ea18e2 --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -0,0 +1,73 @@ +WITH owners AS ( + SELECT + *, + COALESCE( + email, + 'UNKNOWN' + ) AS safe_email, + COALESCE( + first_name, + '' + ) AS safe_first_name, + COALESCE( + last_name, + '' + ) AS safe_last_name + FROM + {{ ref('stg_rag_hubspot__owner') }} +), +deals AS ( + SELECT + *, + COALESCE({{ cast('property_closedate', dbt.type_string()) }}, 'not closed yet') AS safe_close_date + FROM + {{ ref('stg_rag_hubspot__deal') }} +), +company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__company') }} +), +deal_company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__deal_company') }} +), +deal_descriptions AS ( + SELECT + DISTINCT deal.deal_id, + {{ dbt.concat([ "'- {'", "'deal_name: '", "deals.property_dealname", "', '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "', '", "'deal_owner_email: '", "owners.safe_email", "', '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, + deal.property_closedate + FROM + deals + JOIN owners + ON owners.owner_id = deal.owner_id +), +company_with_deal_description AS ( + SELECT + id, + {{ dbt.listagg( + measure = "dd.deal_description", + delimiter_text = "'\\n'", + order_by_clause = "order by dd.property_closedate" + ) }} AS deal_descriptions + FROM + company + JOIN deal_company dc + ON dc.company_id = company.id + JOIN deal_descriptions dd + ON dd.deal_id = dc.deal_id + GROUP BY + 1 +) +SELECT + cdd.deal_descriptions, + company.* +FROM + company + JOIN company_with_deal_description cdd + ON cdd.id = company.id +WHERE + NOT company._fivetran_deleted diff --git a/models/staging/hubspot_staging/src_rag_hubspot.yml b/models/staging/hubspot_staging/src_rag_hubspot.yml index 72a42bf..9d00152 100644 --- a/models/staging/hubspot_staging/src_rag_hubspot.yml +++ b/models/staging/hubspot_staging/src_rag_hubspot.yml @@ -269,3 +269,16 @@ sources: description: The type of owner. - name: updated_at description: Timestamp representing when the owner was last updated. + + - name: deal_company + identifier: "{{ var('rag_hubspot_deal_company_identifier', 'deal_company')}}" + description: Each record represents a 'link' between a deal and a company. + config: + enabled: "{{ var('rag_hubspot_sales_enabled', true) and var('rag_hubspot_company_enabled', true) and var('rag_hubspot_deal_enabled', true) }}" + columns: + - name: _fivetran_synced + description: '{{ doc("_fivetran_synced") }}' + - name: deal_id + description: The ID of the related contact. + - name: company + description: The ID of the related company. \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql new file mode 100644 index 0000000..4ec431e --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql @@ -0,0 +1,46 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + +with base as ( + + {{ + fivetran_utils.union_data( + table_identifier='deal_company', + database_variable='rag_hubspot_database', + schema_variable='rag_hubspot_schema', + default_database=target.database, + default_schema='rag_hubspot', + default_variable='hubspot_deal_company', + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases' + ) + }} +), + +fields as ( + + select + {{ + fivetran_utils.fill_staging_columns( + source_columns=adapter.get_columns_in_relation(source('rag_hubspot','deal_company')), + staging_columns=get_hubspot_deal_company_columns() + ) + }} + + {{ fivetran_utils.source_relation( + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases') + }} + from base +), + +final as ( + + select + deal_id, + company_id, + source_relation + from fields +) + +select * +from final From 0ae873d681559ea6d371e1d231019db382fa2e95 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Tue, 18 Feb 2025 09:26:12 -0800 Subject: [PATCH 02/26] added working company document --- dbt_project.yml | 2 +- .../hubspot/get_hubspot_company_columns.sql | 3 +- .../int_rag_hubspot__company_document.sql | 37 ++++---- .../stg_rag_hubspot__company.sql | 88 ++++++------------- .../stg_rag_hubspot__company_fields.sql | 30 +++++++ .../stg_rag_hubspot__owner.sql | 2 +- 6 files changed, 84 insertions(+), 78 deletions(-) create mode 100644 models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql diff --git a/dbt_project.yml b/dbt_project.yml index 9d16e0f..7509c0f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -35,7 +35,7 @@ models: unified_rag: +schema: unified_rag intermediate: - +materialized: ephemeral + +materialized: view unstructured: +materialized: view staging: diff --git a/macros/staging/hubspot/get_hubspot_company_columns.sql b/macros/staging/hubspot/get_hubspot_company_columns.sql index 9966be6..211e32e 100644 --- a/macros/staging/hubspot/get_hubspot_company_columns.sql +++ b/macros/staging/hubspot/get_hubspot_company_columns.sql @@ -13,7 +13,8 @@ {"name": "property_city", "datatype": dbt.type_string(), "alias": "city"}, {"name": "property_state", "datatype": dbt.type_string(), "alias": "state"}, {"name": "property_country", "datatype": dbt.type_string(), "alias": "country"}, - {"name": "property_annualrevenue", "datatype": dbt.type_int(), "alias": "company_annual_revenue"} + {"name": "property_annualrevenue", "datatype": dbt.type_int(), "alias": "company_annual_revenue"}, + {"name": "portal_id", "datatype": dbt.type_int()} ] %} {{ return(columns) }} diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index 5ea18e2..fc01132 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -2,7 +2,7 @@ WITH owners AS ( SELECT *, COALESCE( - email, + owner_email, 'UNKNOWN' ) AS safe_email, COALESCE( @@ -19,7 +19,7 @@ WITH owners AS ( deals AS ( SELECT *, - COALESCE({{ cast('property_closedate', dbt.type_string()) }}, 'not closed yet') AS safe_close_date + COALESCE({{ cast('closed_date', dbt.type_string()) }}, 'not closed yet') AS safe_close_date FROM {{ ref('stg_rag_hubspot__deal') }} ), @@ -37,37 +37,42 @@ deal_company AS ( ), deal_descriptions AS ( SELECT - DISTINCT deal.deal_id, - {{ dbt.concat([ "'- {'", "'deal_name: '", "deals.property_dealname", "', '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "', '", "'deal_owner_email: '", "owners.safe_email", "', '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, - deal.property_closedate + DISTINCT deals.deal_id, + deals.source_relation, + {{ dbt.concat([ "' - {'", "'deal_name: '", "deals.title", "' // '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "' // '", "'deal_owner_email: '", "owners.safe_email", "' // '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, + deals.closed_date FROM deals JOIN owners - ON owners.owner_id = deal.owner_id + ON owners.owner_id = deals.owner_id + AND owners.source_relation = deals.source_relation ), company_with_deal_description AS ( SELECT - id, + company.company_id AS company_id, + company.source_relation AS source_relation, {{ dbt.listagg( measure = "dd.deal_description", delimiter_text = "'\\n'", - order_by_clause = "order by dd.property_closedate" + order_by_clause = "order by dd.closed_date" ) }} AS deal_descriptions FROM company - JOIN deal_company dc - ON dc.company_id = company.id - JOIN deal_descriptions dd + LEFT JOIN deal_company dc + ON dc.company_id = company.company_id + AND dc.source_relation = company.source_relation + LEFT JOIN deal_descriptions dd ON dd.deal_id = dc.deal_id + AND dc.source_relation = dd.source_relation GROUP BY - 1 + 1, + 2 ) SELECT - cdd.deal_descriptions, + cdd.deal_descriptions AS deals, company.* FROM company JOIN company_with_deal_description cdd - ON cdd.id = company.id -WHERE - NOT company._fivetran_deleted + ON cdd.company_id = company.company_id + AND cdd.source_relation = company.source_relation diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__company.sql index a1b3970..ca37f04 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__company.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__company.sql @@ -1,59 +1,29 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} - -with base as ( - - {{ - fivetran_utils.union_data( - table_identifier='company', - database_variable='rag_hubspot_database', - schema_variable='rag_hubspot_schema', - default_database=target.database, - default_schema='rag_hubspot', - default_variable='hubspot_company', - union_schema_variable='rag_hubspot_union_schemas', - union_database_variable='rag_hubspot_union_databases' - ) - }} -), - -fields as ( - - select - {{ - fivetran_utils.fill_staging_columns( - source_columns=adapter.get_columns_in_relation(source('rag_hubspot','company')), - staging_columns=get_hubspot_company_columns() - ) - }} - - {{ fivetran_utils.source_relation( - union_schema_variable='rag_hubspot_union_schemas', - union_database_variable='rag_hubspot_union_databases') - }} - from base -), - -final as ( - - select - company_id, - source_relation, - is_company_deleted, - cast(_fivetran_synced as {{ dbt.type_timestamp() }}) as _fivetran_synced, - company_name, - description, - created_date, - industry, - street_address, - street_address_2, - city, - state, - country, - company_annual_revenue - - from fields - -) - -select * -from final \ No newline at end of file +{{ config(enabled = var('rag__using_hubspot', True)) }} + +WITH FINAL AS ( + + SELECT + {{ dbt_utils.star( + from = ref('stg_rag_hubspot__company_fields'), + except = ['id', '_fivetran_synced', 'is_deleted', 'property_name', 'property_description', 'property_createdate', 'property_industry', 'property_address', 'property_address_2', 'property_city', 'property_state', 'property_country', 'property_annualrevenue' ] + ) }}, + id AS company_id, + CAST(_fivetran_synced AS {{ dbt.type_timestamp() }}) AS _fivetran_synced, + is_deleted AS is_company_deleted, + property_name AS company_name, + property_description AS description, + property_createdate AS created_date, + property_industry AS industry, + property_address AS street_address, + property_address_2 AS street_address_2, + property_city AS city, + property_state AS state, + property_country AS country, + property_annualrevenue AS company_annual_revenue + FROM + {{ ref('stg_rag_hubspot__company_fields') }} +) +SELECT + * +FROM + FINAL diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql b/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql new file mode 100644 index 0000000..a2632cc --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql @@ -0,0 +1,30 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + +with base as ( + + {{ + fivetran_utils.union_data( + table_identifier='company', + database_variable='rag_hubspot_database', + schema_variable='rag_hubspot_schema', + default_database=target.database, + default_schema='rag_hubspot', + default_variable='hubspot_company', + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases' + ) + }} +), + +fields as ( + + select + * + {{ fivetran_utils.source_relation( + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases') + }} + from base +) + +select * from fields \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__owner.sql b/models/staging/hubspot_staging/stg_rag_hubspot__owner.sql index fa06618..62a5175 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__owner.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__owner.sql @@ -21,7 +21,7 @@ fields as ( select {{ fivetran_utils.fill_staging_columns( - source_columns=adapter.get_columns_in_relation(source('rag_hubspot','contact')), + source_columns=adapter.get_columns_in_relation(source('rag_hubspot','owner')), staging_columns=get_hubspot_owner_columns() ) }} From cd24a991c12ac14a63dd8e67d83ffeeb4933a730 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Tue, 18 Feb 2025 10:02:18 -0800 Subject: [PATCH 03/26] remove unnecessary changes --- dbt_project.yml | 2 +- macros/staging/hubspot/get_hubspot_company_columns.sql | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dbt_project.yml b/dbt_project.yml index 7509c0f..9d16e0f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -35,7 +35,7 @@ models: unified_rag: +schema: unified_rag intermediate: - +materialized: view + +materialized: ephemeral unstructured: +materialized: view staging: diff --git a/macros/staging/hubspot/get_hubspot_company_columns.sql b/macros/staging/hubspot/get_hubspot_company_columns.sql index 211e32e..9966be6 100644 --- a/macros/staging/hubspot/get_hubspot_company_columns.sql +++ b/macros/staging/hubspot/get_hubspot_company_columns.sql @@ -13,8 +13,7 @@ {"name": "property_city", "datatype": dbt.type_string(), "alias": "city"}, {"name": "property_state", "datatype": dbt.type_string(), "alias": "state"}, {"name": "property_country", "datatype": dbt.type_string(), "alias": "country"}, - {"name": "property_annualrevenue", "datatype": dbt.type_int(), "alias": "company_annual_revenue"}, - {"name": "portal_id", "datatype": dbt.type_int()} + {"name": "property_annualrevenue", "datatype": dbt.type_int(), "alias": "company_annual_revenue"} ] %} {{ return(columns) }} From 3686c1d7f0030d24b7adb451e2cd094667bdc4d2 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Tue, 18 Feb 2025 11:46:15 -0800 Subject: [PATCH 04/26] make it a left join for Deal --- models/unstructured/rag_hubspot__document.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index ece1210..c7349ed 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -26,10 +26,10 @@ final as ( {{ dbt.concat([ "deal_document.comment_markdown", "'\\n\\n## COMMENTS\\n\\n'", - "grouped.comments_group_markdown"]) }} + "coalesce(grouped.comments_group_markdown, '')"]) }} as chunk from deal_document - join grouped + left join grouped on grouped.deal_id = deal_document.deal_id and grouped.source_relation = deal_document.source_relation ) From 17e782bed721821cf2d8366e5ac0cc2f2aed3099 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Wed, 19 Feb 2025 11:32:32 -0800 Subject: [PATCH 05/26] fix bug and add owners to Deal --- .../hubspot/int_rag_hubspot__deal_document.sql | 17 ++++++++++++++--- models/unstructured/rag_hubspot__document.sql | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index e9927aa..4c3445f 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -41,6 +41,11 @@ engagement_deals as ( from {{ ref('stg_rag_hubspot__engagement_deal') }} ), +owners AS ( + select * + from {{ ref('stg_rag_hubspot__owner') }} +), + engagement_detail_prep as ( select @@ -52,7 +57,8 @@ engagement_detail_prep as ( {{ unified_rag.coalesce_cast(["contacts.contact_name", "'UNKNOWN'"], dbt.type_string()) }} as contact_name, {{ unified_rag.coalesce_cast(["contacts.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by, {{ unified_rag.coalesce_cast(["companies.company_name", "'UNKNOWN'"], dbt.type_string()) }} as company_name, - {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on + {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on, + {{ dbt.concat(["coalesce(owners.first_name, '')", "' '", "coalesce(owners.last_name, '')", "' ('", "coalesce(owners.owner_email, '')", "')'"]) }} AS owner_details from deals left join engagement_deals on deals.deal_id = engagement_deals.deal_id @@ -72,6 +78,9 @@ engagement_detail_prep as ( left join companies on engagement_companies.company_id = companies.company_id and engagement_companies.source_relation = companies.source_relation + left join owners + on deals.owner_id = owners.owner_id + and deals.source_relation = owners.source_relation ), engagement_details as ( @@ -84,7 +93,8 @@ engagement_details as ( {{ fivetran_utils.string_agg(field_to_agg="distinct engagement_type", delimiter="', '") }} as engagement_type, {{ fivetran_utils.string_agg(field_to_agg="distinct contact_name", delimiter="', '") }} as contact_name, {{ fivetran_utils.string_agg(field_to_agg="distinct created_by", delimiter="', '") }} as created_by, - {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name + {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name, + {{ fivetran_utils.string_agg(field_to_agg="distinct owner_details", delimiter="', '") }} as owner_details from engagement_detail_prep group by 1,2,3,4,5 ), @@ -101,7 +111,8 @@ engagement_markdown as ( "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", "'Created On : '", "created_on", "'\\n'", "'Company Name: '", "company_name", "'\\n'", - "'Engagement Type: '", "engagement_type", "'\\n'" + "'Engagement Type: '", "engagement_type", "'\\n'", + "'Deal Owner: '", "owner_details", "'\\n'" ]) }} as {{ dbt.type_string() }}) as comment_markdown from engagement_details ), diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index c7349ed..dd23856 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -21,7 +21,7 @@ final as ( 'hubspot' as platform, deal_document.source_relation, grouped.most_recent_chunk_update, - grouped.chunk_index, + coalesce(grouped.chunk_index, 0) as chunk_index, grouped.chunk_tokens as chunk_tokens_approximate, {{ dbt.concat([ "deal_document.comment_markdown", From ad595234aecbe86c8539d13f9543f5396fdac81c Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Sat, 22 Feb 2025 13:21:51 -0800 Subject: [PATCH 06/26] enable on flag --- .../intermediate/hubspot/int_rag_hubspot__company_document.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index fc01132..ea28e0a 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -1,3 +1,5 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + WITH owners AS ( SELECT *, From 886e9a1533da837a476bdebdd8885316af916432 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Wed, 5 Mar 2025 14:13:59 -0800 Subject: [PATCH 07/26] add company ids to deal documents --- models/intermediate/hubspot/int_rag_hubspot__deal_document.sql | 3 +++ models/unstructured/rag_hubspot__document.sql | 1 + 2 files changed, 4 insertions(+) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index 4c3445f..a26ad97 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -56,6 +56,7 @@ engagement_detail_prep as ( deals.source_relation, {{ unified_rag.coalesce_cast(["contacts.contact_name", "'UNKNOWN'"], dbt.type_string()) }} as contact_name, {{ unified_rag.coalesce_cast(["contacts.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by, + {{ unified_rag.coalesce_cast(["companies.company_id", "'UNKNOWN'"], dbt.type_string()) }} as company_id, {{ unified_rag.coalesce_cast(["companies.company_name", "'UNKNOWN'"], dbt.type_string()) }} as company_name, {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on, {{ dbt.concat(["coalesce(owners.first_name, '')", "' '", "coalesce(owners.last_name, '')", "' ('", "coalesce(owners.owner_email, '')", "')'"]) }} AS owner_details @@ -94,6 +95,7 @@ engagement_details as ( {{ fivetran_utils.string_agg(field_to_agg="distinct contact_name", delimiter="', '") }} as contact_name, {{ fivetran_utils.string_agg(field_to_agg="distinct created_by", delimiter="', '") }} as created_by, {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name, + {{ fivetran_utils.string_agg(field_to_agg="distinct company_id", delimiter="', '") }} as company_ids, {{ fivetran_utils.string_agg(field_to_agg="distinct owner_details", delimiter="', '") }} as owner_details from engagement_detail_prep group by 1,2,3,4,5 @@ -106,6 +108,7 @@ engagement_markdown as ( title, source_relation, url_reference, + company_ids, cast( {{ dbt.concat([ "'Deal Name : '", "title", "'\\n\\n'", "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index dd23856..af3e8a2 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -18,6 +18,7 @@ final as ( cast(deal_document.deal_id as {{ dbt.type_string() }}) as document_id, coalesce(deal_document.title, grouped.title) as title, deal_document.url_reference, + deal_document.company_ids, 'hubspot' as platform, deal_document.source_relation, grouped.most_recent_chunk_update, From 4cbff831c044a80676767e862f00278eaeff5af2 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:11:45 -0800 Subject: [PATCH 08/26] more iteration --- macros/utility/create_json.sql | 26 +++++++++++++++++++ .../int_rag_hubspot__company_document.sql | 15 ++++++----- 2 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 macros/utility/create_json.sql diff --git a/macros/utility/create_json.sql b/macros/utility/create_json.sql new file mode 100644 index 0000000..b4e9cff --- /dev/null +++ b/macros/utility/create_json.sql @@ -0,0 +1,26 @@ +{% macro create_json(columns) %} + {%- if not execute -%} + {%- set json_function = { + 'bigquery': 'TO_JSON_STRING', + 'snowflake': 'OBJECT_CONSTRUCT', + 'redshift': 'json_build_object', + 'databricks': 'to_json' + }[target.type] -%} + {%- set json_expression = json_function + '(' -%} + {%- for column in columns -%} + {%- set json_expression = json_expression + "'" + column + "', " + column -%} + {%- if not loop.last -%} + {%- set json_expression = json_expression + ', ' -%} + {%- endif -%} + {%- endfor -%} + {%- set json_expression = json_expression + ')' -%} + + {%- if target.type == 'snowflake' -%} + CAST({{ json_expression }} AS STRING) + {%- elif target.type == 'redshift' -%} + {{ json_expression }}::VARCHAR + {%- else -%} + {{ json_expression }} + {%- endif -%} + {%- endif -%} +{% endmacro %} diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index ea28e0a..95e6be0 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -39,15 +39,16 @@ deal_company AS ( ), deal_descriptions AS ( SELECT - DISTINCT deals.deal_id, - deals.source_relation, - {{ dbt.concat([ "' - {'", "'deal_name: '", "deals.title", "' // '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "' // '", "'deal_owner_email: '", "owners.safe_email", "' // '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, - deals.closed_date + DISTINCT deal_id, + source_relation, + safe_close_date AS closed_date, + --{{ dbt.concat([ "' - {'", "'deal_name: '", "deals.title", "' // '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "' // '", "'deal_owner_email: '", "owners.safe_email", "' // '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, + {{ create_json(['deal_id', 'title', 'safe_close_date']) }} AS deal_description FROM deals - JOIN owners - ON owners.owner_id = deals.owner_id - AND owners.source_relation = deals.source_relation + --JOIN owners + --ON owners.owner_id = deals.owner_id + --AND owners.source_relation = deals.source_relation ), company_with_deal_description AS ( SELECT From 6514a72c12a360136112fc52096cdd167c64ce05 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:19:08 -0800 Subject: [PATCH 09/26] fix json macro bug --- macros/utility/create_json.sql | 42 ++++++++++++++++------------------ 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/macros/utility/create_json.sql b/macros/utility/create_json.sql index b4e9cff..6801b04 100644 --- a/macros/utility/create_json.sql +++ b/macros/utility/create_json.sql @@ -1,26 +1,24 @@ {% macro create_json(columns) %} - {%- if not execute -%} - {%- set json_function = { - 'bigquery': 'TO_JSON_STRING', - 'snowflake': 'OBJECT_CONSTRUCT', - 'redshift': 'json_build_object', - 'databricks': 'to_json' - }[target.type] -%} - {%- set json_expression = json_function + '(' -%} - {%- for column in columns -%} - {%- set json_expression = json_expression + "'" + column + "', " + column -%} - {%- if not loop.last -%} - {%- set json_expression = json_expression + ', ' -%} - {%- endif -%} - {%- endfor -%} - {%- set json_expression = json_expression + ')' -%} - - {%- if target.type == 'snowflake' -%} - CAST({{ json_expression }} AS STRING) - {%- elif target.type == 'redshift' -%} - {{ json_expression }}::VARCHAR - {%- else -%} - {{ json_expression }} + {%- set json_function = { + 'bigquery': 'TO_JSON_STRING', + 'snowflake': 'OBJECT_CONSTRUCT', + 'redshift': 'json_build_object', + 'databricks': 'to_json' + }[target.type] -%} + {%- set json_expression = json_function + '(' -%} + {%- for column in columns -%} + {%- set json_expression = json_expression + "'" + column + "', " + column -%} + {%- if not loop.last -%} + {%- set json_expression = json_expression + ', ' -%} {%- endif -%} + {%- endfor -%} + {%- set json_expression = json_expression + ')' -%} + + {%- if target.type == 'snowflake' -%} + CAST({{ json_expression }} AS STRING) + {%- elif target.type == 'redshift' -%} + {{ json_expression }}::VARCHAR + {%- else -%} + {{ json_expression }} {%- endif -%} {% endmacro %} From ab87a84c16831fcc74ab7c744a7e7afcd7dc6ede Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:39:00 -0800 Subject: [PATCH 10/26] concat into json list --- .../hubspot/int_rag_hubspot__company_document.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index 95e6be0..45495b4 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -54,11 +54,11 @@ company_with_deal_description AS ( SELECT company.company_id AS company_id, company.source_relation AS source_relation, - {{ dbt.listagg( - measure = "dd.deal_description", - delimiter_text = "'\\n'", - order_by_clause = "order by dd.closed_date" - ) }} AS deal_descriptions + {{ dbt.concat([ + "'['", + dbt.listagg("dd.deal_description", "','", "order by dd.closed_date"), + "']'" + ]) }} AS deal_descriptions FROM company LEFT JOIN deal_company dc From 121163de82d70fe1b08e9d40ce9f22cd4d811a6d Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:50:12 -0800 Subject: [PATCH 11/26] try to fix json macro --- macros/utility/create_json.sql | 62 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/macros/utility/create_json.sql b/macros/utility/create_json.sql index 6801b04..b76218c 100644 --- a/macros/utility/create_json.sql +++ b/macros/utility/create_json.sql @@ -1,24 +1,38 @@ -{% macro create_json(columns) %} - {%- set json_function = { - 'bigquery': 'TO_JSON_STRING', - 'snowflake': 'OBJECT_CONSTRUCT', - 'redshift': 'json_build_object', - 'databricks': 'to_json' - }[target.type] -%} - {%- set json_expression = json_function + '(' -%} - {%- for column in columns -%} - {%- set json_expression = json_expression + "'" + column + "', " + column -%} - {%- if not loop.last -%} - {%- set json_expression = json_expression + ', ' -%} - {%- endif -%} - {%- endfor -%} - {%- set json_expression = json_expression + ')' -%} - - {%- if target.type == 'snowflake' -%} - CAST({{ json_expression }} AS STRING) - {%- elif target.type == 'redshift' -%} - {{ json_expression }}::VARCHAR - {%- else -%} - {{ json_expression }} - {%- endif -%} -{% endmacro %} +{% macro create_json(columns) -%} + {% if target.type == 'bigquery' -%} + TO_JSON_STRING( + STRUCT( + {%- for column in columns -%} + {{ column }} AS {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + ) + {% elif target.type == 'snowflake' -%} + CAST( + OBJECT_CONSTRUCT( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + AS STRING + ) + {% elif target.type == 'redshift' -%} + json_build_object( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + )::VARCHAR + {% elif target.type == 'databricks' -%} + to_json( + named_struct( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + ) + {% endif -%} +{% endmacro -%} From 4128792724f0cdbedddc0b7d5212449d2b1cf79f Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 13:53:52 -0800 Subject: [PATCH 12/26] remove comment --- .../hubspot/int_rag_hubspot__company_document.sql | 4 ---- 1 file changed, 4 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index 45495b4..3bf68ea 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -42,13 +42,9 @@ deal_descriptions AS ( DISTINCT deal_id, source_relation, safe_close_date AS closed_date, - --{{ dbt.concat([ "' - {'", "'deal_name: '", "deals.title", "' // '", "'deal_owner_name: '", "owners.safe_first_name", "' '", "owners.safe_last_name", "' // '", "'deal_owner_email: '", "owners.safe_email", "' // '", "'deal_closed_date: '", "deals.safe_close_date", "'}'" ]) }} AS deal_description, {{ create_json(['deal_id', 'title', 'safe_close_date']) }} AS deal_description FROM deals - --JOIN owners - --ON owners.owner_id = deals.owner_id - --AND owners.source_relation = deals.source_relation ), company_with_deal_description AS ( SELECT From 919f7a1d42eef9438c77e66880489e71f9ad91f7 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 14:34:07 -0800 Subject: [PATCH 13/26] add companies to deals --- .../int_rag_hubspot__deal_document.sql | 28 +++++++++++++++---- models/unstructured/rag_hubspot__document.sql | 2 +- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index a26ad97..15f7bd9 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -13,11 +13,19 @@ contacts as ( ), companies as ( - - select * + select + *, + {{ create_json(['company_id']) }} AS company_desc from {{ ref('stg_rag_hubspot__company') }} ), +deal_company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__deal_company') }} +), + engagements as ( select * from {{ ref('stg_rag_hubspot__engagement') }} @@ -56,7 +64,6 @@ engagement_detail_prep as ( deals.source_relation, {{ unified_rag.coalesce_cast(["contacts.contact_name", "'UNKNOWN'"], dbt.type_string()) }} as contact_name, {{ unified_rag.coalesce_cast(["contacts.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by, - {{ unified_rag.coalesce_cast(["companies.company_id", "'UNKNOWN'"], dbt.type_string()) }} as company_id, {{ unified_rag.coalesce_cast(["companies.company_name", "'UNKNOWN'"], dbt.type_string()) }} as company_name, {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on, {{ dbt.concat(["coalesce(owners.first_name, '')", "' '", "coalesce(owners.last_name, '')", "' ('", "coalesce(owners.owner_email, '')", "')'"]) }} AS owner_details @@ -95,7 +102,6 @@ engagement_details as ( {{ fivetran_utils.string_agg(field_to_agg="distinct contact_name", delimiter="', '") }} as contact_name, {{ fivetran_utils.string_agg(field_to_agg="distinct created_by", delimiter="', '") }} as created_by, {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name, - {{ fivetran_utils.string_agg(field_to_agg="distinct company_id", delimiter="', '") }} as company_ids, {{ fivetran_utils.string_agg(field_to_agg="distinct owner_details", delimiter="', '") }} as owner_details from engagement_detail_prep group by 1,2,3,4,5 @@ -108,7 +114,11 @@ engagement_markdown as ( title, source_relation, url_reference, - company_ids, + {{ dbt.concat([ + "'['", + dbt.listagg("cc.company_desc", "','"), + "']'" + ]) }} AS companies, cast( {{ dbt.concat([ "'Deal Name : '", "title", "'\\n\\n'", "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", @@ -117,7 +127,13 @@ engagement_markdown as ( "'Engagement Type: '", "engagement_type", "'\\n'", "'Deal Owner: '", "owner_details", "'\\n'" ]) }} as {{ dbt.type_string() }}) as comment_markdown - from engagement_details + from engagement_details ed + left join deal_company dc + on dc.deal_id = ed.deal_id + and dc.source_relation = ed.source_relation + left join companies cc + on dc.company_id = cc.company_id + and dc.source_relation = cc.source_relation ), engagement_tokens as ( diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index af3e8a2..d246927 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -18,7 +18,7 @@ final as ( cast(deal_document.deal_id as {{ dbt.type_string() }}) as document_id, coalesce(deal_document.title, grouped.title) as title, deal_document.url_reference, - deal_document.company_ids, + deal_document.companies, 'hubspot' as platform, deal_document.source_relation, grouped.most_recent_chunk_update, From 999f9c0da9f7245ba451ef05d45737f4302f5bce Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 14:45:26 -0800 Subject: [PATCH 14/26] add company name --- models/intermediate/hubspot/int_rag_hubspot__deal_document.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index 15f7bd9..6ed89b0 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -15,7 +15,7 @@ contacts as ( companies as ( select *, - {{ create_json(['company_id']) }} AS company_desc + {{ create_json(['company_id', 'company_name']) }} AS company_desc from {{ ref('stg_rag_hubspot__company') }} ), From 400f24daf467c802608cdc66c663eef96f19f33a Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 6 Mar 2025 14:48:19 -0800 Subject: [PATCH 15/26] fix query --- .../int_rag_hubspot__deal_document.sql | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index 6ed89b0..dccbe08 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -110,23 +110,23 @@ engagement_details as ( engagement_markdown as ( select - deal_id, - title, - source_relation, - url_reference, - {{ dbt.concat([ - "'['", - dbt.listagg("cc.company_desc", "','"), - "']'" - ]) }} AS companies, + ed.deal_id, + ed.title, + ed.source_relation, + ed.url_reference, cast( {{ dbt.concat([ "'Deal Name : '", "title", "'\\n\\n'", "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", "'Created On : '", "created_on", "'\\n'", - "'Company Name: '", "company_name", "'\\n'", + "'Company Name: '", "ed.company_name", "'\\n'", "'Engagement Type: '", "engagement_type", "'\\n'", "'Deal Owner: '", "owner_details", "'\\n'" - ]) }} as {{ dbt.type_string() }}) as comment_markdown + ]) }} as {{ dbt.type_string() }}) as comment_markdown, + {{ dbt.concat([ + "'['", + dbt.listagg("cc.company_desc", "','"), + "']'" + ]) }} AS companies from engagement_details ed left join deal_company dc on dc.deal_id = ed.deal_id @@ -134,6 +134,7 @@ engagement_markdown as ( left join companies cc on dc.company_id = cc.company_id and dc.source_relation = cc.source_relation + group by 1,2,3,4,5 ), engagement_tokens as ( From fe957adfbe3f31b288dee60eb3ba8cc9f5bba50f Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Tue, 11 Mar 2025 12:30:45 -0700 Subject: [PATCH 16/26] Add team to hubspot model --- .../hubspot_staging/src_rag_hubspot.yml | 17 +++++- .../hubspot_staging/stg_rag_hubspot.yml | 13 +++++ .../hubspot_staging/stg_rag_hubspot__team.sql | 53 +++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 models/staging/hubspot_staging/stg_rag_hubspot__team.sql diff --git a/models/staging/hubspot_staging/src_rag_hubspot.yml b/models/staging/hubspot_staging/src_rag_hubspot.yml index 9d00152..f498a12 100644 --- a/models/staging/hubspot_staging/src_rag_hubspot.yml +++ b/models/staging/hubspot_staging/src_rag_hubspot.yml @@ -281,4 +281,19 @@ sources: - name: deal_id description: The ID of the related contact. - name: company - description: The ID of the related company. \ No newline at end of file + description: The ID of the related company. + + - name: team + identifier: "{{ var('rag_hubspot_deal_company_identifier', 'team')}}" + description: Each record represents a team. + config: + enabled: "{{ var('rag_hubspot_team_enabled', true) }}" + columns: + - name: _fivetran_synced + description: '{{ doc("_fivetran_synced") }}' + - name: _fivetran_deleted + description: '{{ doc("_fivetran_deleted") }}' + - name: id + description: The ID of the team. + - name: name + description: The name of the team. \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot.yml b/models/staging/hubspot_staging/stg_rag_hubspot.yml index ec0d765..ac1eee2 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot.yml +++ b/models/staging/hubspot_staging/stg_rag_hubspot.yml @@ -279,3 +279,16 @@ models: - name: owner_email description: The email address of the owner. + - name: stg_rag_hubspot__team + description: Each record represents a team in Hubspot. + columns: + - name: _fivetran_synced + description: '{{ doc("_fivetran_synced") }}' + - name: _fivetran_deleted + description: Boolean to mark rows that were deleted in the source database. + - name: id + description: The id of the team. + - name: name + description: Name of the team. + - name: source_relation + description: The source of the record if the unioning functionality is being used. If it is not this field will be empty. \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__team.sql b/models/staging/hubspot_staging/stg_rag_hubspot__team.sql new file mode 100644 index 0000000..b490611 --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__team.sql @@ -0,0 +1,53 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + +with base as ( + + {{ + fivetran_utils.union_data( + table_identifier='team', + database_variable='rag_hubspot_database', + schema_variable='rag_hubspot_schema', + default_database=target.database, + default_schema='rag_hubspot', + default_variable='hubspot_team', + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases' + ) + }} +), + +fields as ( + + select + {{ + fivetran_utils.fill_staging_columns( + source_columns=adapter.get_columns_in_relation(source('rag_hubspot','team')), + staging_columns= [ + {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, + {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, + {"name": "id", "datatype": dbt.type_int()}, + {"name": "name", "datatype": dbt.type_string()} + ] + ) + }} + + {{ fivetran_utils.source_relation( + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases') + }} + from base +), + +final as ( + + select + id, + name, + source_relation, + _fivetran_deleted + cast(_fivetran_synced as {{ dbt.type_timestamp() }}) as _fivetran_synced, + from fields +) + +select * +from final \ No newline at end of file From 2c7d8cb72163e2b0455dad1ff51f6cd8a27caecd Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Tue, 11 Mar 2025 12:45:25 -0700 Subject: [PATCH 17/26] Update stg_rag_hubspot__team.sql --- .../hubspot_staging/stg_rag_hubspot__team.sql | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__team.sql b/models/staging/hubspot_staging/stg_rag_hubspot__team.sql index b490611..499bc5a 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__team.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__team.sql @@ -1,4 +1,11 @@ {{ config(enabled=var('rag__using_hubspot', True)) }} +{% set hubspot_team_columns = [ + {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, + {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, + {"name": "id", "datatype": dbt.type_int()}, + {"name": "name", "datatype": dbt.type_string()} +] %} + with base as ( @@ -22,12 +29,7 @@ fields as ( {{ fivetran_utils.fill_staging_columns( source_columns=adapter.get_columns_in_relation(source('rag_hubspot','team')), - staging_columns= [ - {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, - {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, - {"name": "id", "datatype": dbt.type_int()}, - {"name": "name", "datatype": dbt.type_string()} - ] + staging_columns= hubspot_team_columns ) }} @@ -44,7 +46,7 @@ final as ( id, name, source_relation, - _fivetran_deleted + _fivetran_deleted, cast(_fivetran_synced as {{ dbt.type_timestamp() }}) as _fivetran_synced, from fields ) From 2cecd0220f01a1ad0d93526a915631c95999e2d9 Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Mon, 31 Mar 2025 18:26:05 -0700 Subject: [PATCH 18/26] Add Issue Key to Jira model --- models/intermediate/jira/int_rag_jira__issue_document.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/intermediate/jira/int_rag_jira__issue_document.sql b/models/intermediate/jira/int_rag_jira__issue_document.sql index a5c8e83..5373adc 100644 --- a/models/intermediate/jira/int_rag_jira__issue_document.sql +++ b/models/intermediate/jira/int_rag_jira__issue_document.sql @@ -34,6 +34,7 @@ issue_details as ( select issues.issue_id, + issues.issue_key, {{ unified_rag.coalesce_cast(["issues.title", "'UNKNOWN'"], dbt.type_string()) }} as title, {% if var('jira_subdomain', default=None) %} {{ dbt.concat(["'https://'", "jira_subdomain_value", "'.atlassian.net/browse/'", "issues.issue_key"]) }} as url_reference, @@ -70,7 +71,7 @@ final as ( source_relation, url_reference, {{ dbt.concat([ - "'# issue : '", "title", "'\\n\\n'", + "'# issue ('", "issue_key", "') : '", "title", "'\\n\\n'", "'Created By : '", "user_name", "' ('", "created_by", "')\\n'", "'Created On : '", "created_on", "'\\n'", "'Status : '", "status", "'\\n'", From ab53caca8f03370c73e23b08d54c3d0dd01da06c Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 10 Apr 2025 15:08:13 -0700 Subject: [PATCH 19/26] add engagement model --- .../hubspot/int_rag_hubspot__engagement.sql | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 models/intermediate/hubspot/int_rag_hubspot__engagement.sql diff --git a/models/intermediate/hubspot/int_rag_hubspot__engagement.sql b/models/intermediate/hubspot/int_rag_hubspot__engagement.sql new file mode 100644 index 0000000..5e7b8ba --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__engagement.sql @@ -0,0 +1,120 @@ +WITH engagement_emails AS ( + SELECT + engagement_email.engagement_id, + engagement_email.source_relation, + engagement_email.engagement_type, + engagement_email.created_timestamp, + engagement_email.occurred_timestamp, + engagement_email.owner_id, + engagement_email.team_id, + engagement_email.body, + engagement_email.title, + engagement_email.email_to_email, + engagement_email.email_cc_email, + engagement_email.email_from_email AS commenter_email, + {{ fivetran_utils.string_agg( + field_to_agg = "contacts.contact_name", + delimiter = "','" + ) }} AS commenter_name + FROM + {{ ref('stg_rag_hubspot__engagement_email') }} + engagement_email + LEFT JOIN engagement_contacts + ON engagement_email.engagement_id = engagement_contacts.engagement_id + AND engagement_email.source_relation = engagement_contacts.source_relation + LEFT JOIN contacts + ON engagement_contacts.contact_id = contacts.contact_id + AND engagement_contacts.source_relation = contacts.source_relation {{ dbt_utils.group_by(12) }} +), +engagement_notes AS ( + SELECT + engagement_note.engagement_id, + engagement_note.source_relation, + engagement_note.engagement_type, + engagement_note.created_timestamp, + engagement_note.occurred_timestamp, + engagement_note.owner_id, + engagement_note.team_id, + engagement_note.title, + engagement_note.body, + owners.owner_name, + owners.owner_email + FROM + {{ ref('stg_rag_hubspot__engagement_note') }} + engagement_note + LEFT JOIN owners + ON engagement_note.owner_id = owners.owner_id + AND engagement_note.source_relation = owners.source_relation +), +email_comment_details AS ( + SELECT + {{ unified_rag.coalesce_cast( + ["engagement_emails.commenter_email", "'UNKNOWN'"], + dbt.type_string() + ) }} AS commenter_email, + {{ unified_rag.coalesce_cast( + ["engagement_emails.commenter_name", "'UNKNOWN'"], + dbt.type_string() + ) }} AS commenter_name, + {{ unified_rag.coalesce_cast( + ["engagement_emails.title", "'UNKNOWN'"], + dbt.type_string() + ) }} AS email_title, + {{ unified_rag.coalesce_cast( + ["engagement_emails.created_timestamp", "'1970-01-01 00:00:00'"], + dbt.type_timestamp() + ) }} AS comment_time, + {{ unified_rag.coalesce_cast( + ["engagement_emails.body", "'UNKNOWN'"], + dbt.type_string() + ) }} AS comment_body + FROM + engagement_emails + WHERE + LOWER( + engagement_emails.engagement_type + ) = 'email' +), +note_comment_details AS ( + SELECT + {{ unified_rag.coalesce_cast( + ["engagement_notes.owner_email", "'UNKNOWN'"], + dbt.type_string() + ) }} AS commenter_email, + {{ unified_rag.coalesce_cast( + ["engagement_notes.owner_name", "'UNKNOWN'"], + dbt.type_string() + ) }} AS commenter_name, + engagement_notes.title AS engagement_note_title, + {{ unified_rag.coalesce_cast( + ["engagement_notes.created_timestamp", "'1970-01-01 00:00:00'"], + dbt.type_timestamp() + ) }} AS comment_time, + {{ unified_rag.coalesce_cast( + ["engagement_notes.body", "'UNKNOWN'"], + dbt.type_string() + ) }} AS comment_body + FROM + engagement_notes + WHERE + LOWER( + engagement_deals.engagement_type + ) = 'note' +), +SELECT + source_relation, + comment_time, + CAST( + {{ dbt.concat([ "'Email subject: '", "email_title", "'\\n'", "'### message from '", "commenter_name", "' ('", "commenter_email", "')\\n'", "'##### sent @ '", "comment_time", "'\\n'", "comment_body" ]) }} AS {{ dbt.type_string() }} + ) AS comment_markdown +FROM + email_comment_details +UNION ALL +SELECT + source_relation, + comment_time, + CAST( + {{ dbt.concat([ "'Engagement type: Note'", "'\\n'", "'### message from '", "commenter_name", "' ('", "commenter_email", "')\\n'", "'##### sent @ '", "comment_time", "'\\n'", "comment_body" ]) }} AS {{ dbt.type_string() }} + ) AS comment_markdown +FROM + note_comment_details From 7771b7e034fd4423fdcb31d58a841d9360f21dc3 Mon Sep 17 00:00:00 2001 From: levonkorganyan Date: Thu, 10 Apr 2025 21:42:34 -0700 Subject: [PATCH 20/26] add(hubspot): Ticket object --- dbt_project.yml | 4 + .../hubspot/int_rag_hubspot__engagement.sql | 38 +++-- ...t_rag_hubspot__ticket_comment_document.sql | 138 ++++++++++++++++++ .../int_rag_hubspot__ticket_document.sql | 60 ++++++++ .../hubspot_staging/src_rag_hubspot.yml | 37 +++-- .../stg_rag_hubspot__ticket.sql | 34 +++++ .../stg_rag_hubspot__ticket_company.sql | 37 +++++ .../stg_rag_hubspot__ticket_engagement.sql | 38 +++++ 8 files changed, 365 insertions(+), 21 deletions(-) create mode 100644 models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql create mode 100644 models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql create mode 100644 models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql create mode 100644 models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql create mode 100644 models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql diff --git a/dbt_project.yml b/dbt_project.yml index 9d16e0f..d7d58f7 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -29,6 +29,10 @@ vars: hubspot_contact: "{{ source('rag_hubspot', 'contact') }}" hubspot_deal: "{{ source('rag_hubspot', 'deal') }}" hubspot_owner: "{{ source('rag_hubspot', 'owner') }}" + hubspot_ticket: "{{ source('rag_hubspot', 'ticket') }}" + hubspot_ticket_company: "{{ source('rag_hubspot', 'ticket_company') }}" + hubspot_ticket_engagement: "{{ source('rag_hubspot', 'ticket_engagement') }}" + models: diff --git a/models/intermediate/hubspot/int_rag_hubspot__engagement.sql b/models/intermediate/hubspot/int_rag_hubspot__engagement.sql index 5e7b8ba..1038860 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__engagement.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__engagement.sql @@ -1,4 +1,24 @@ -WITH engagement_emails AS ( +{{ config(enabled=var('rag__using_hubspot', True)) }} + +WITH contacts AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__contact') }} +), +engagement_contacts AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__engagement_contact') }} +), +owners AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__owner') }} +), +engagement_emails AS ( SELECT engagement_email.engagement_id, engagement_email.source_relation, @@ -48,6 +68,8 @@ engagement_notes AS ( ), email_comment_details AS ( SELECT + source_relation, + engagement_id, {{ unified_rag.coalesce_cast( ["engagement_emails.commenter_email", "'UNKNOWN'"], dbt.type_string() @@ -70,13 +92,11 @@ email_comment_details AS ( ) }} AS comment_body FROM engagement_emails - WHERE - LOWER( - engagement_emails.engagement_type - ) = 'email' ), note_comment_details AS ( SELECT + source_relation, + engagement_id, {{ unified_rag.coalesce_cast( ["engagement_notes.owner_email", "'UNKNOWN'"], dbt.type_string() @@ -96,13 +116,10 @@ note_comment_details AS ( ) }} AS comment_body FROM engagement_notes - WHERE - LOWER( - engagement_deals.engagement_type - ) = 'note' -), +) SELECT source_relation, + engagement_id, comment_time, CAST( {{ dbt.concat([ "'Email subject: '", "email_title", "'\\n'", "'### message from '", "commenter_name", "' ('", "commenter_email", "')\\n'", "'##### sent @ '", "comment_time", "'\\n'", "comment_body" ]) }} AS {{ dbt.type_string() }} @@ -112,6 +129,7 @@ FROM UNION ALL SELECT source_relation, + engagement_id, comment_time, CAST( {{ dbt.concat([ "'Engagement type: Note'", "'\\n'", "'### message from '", "commenter_name", "' ('", "commenter_email", "')\\n'", "'##### sent @ '", "comment_time", "'\\n'", "comment_body" ]) }} AS {{ dbt.type_string() }} diff --git a/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql b/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql new file mode 100644 index 0000000..e0e7bd3 --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql @@ -0,0 +1,138 @@ +{{ config(enabled = var('rag__using_hubspot', True)) }} + +WITH tickets AS ( + + SELECT + * + FROM + {{ ref('stg_rag_hubspot__ticket') }} +), +ticket_engagements AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__ticket_engagement') }} +), +engagement_details AS ( + SELECT + * + FROM + {{ ref('int_rag_hubspot__engagement') }} +), +comments_tokens AS ( + SELECT + *, + {{ unified_rag.count_tokens("comment_markdown") }} AS comment_tokens + FROM + engagement_details +), +truncated_comments AS ( + SELECT + engagement_id, + source_relation, + comment_time, + CASE + WHEN comment_tokens > {{ var( + 'document_max_tokens', + 5000 + ) }} THEN LEFT( + comment_markdown, + {{ var( + 'document_max_tokens', + 5000 + ) }} * 4 + ) -- approximate 4 characters per token + ELSE comment_markdown + END AS comment_markdown, + CASE + WHEN comment_tokens > {{ var( + 'document_max_tokens', + 5000 + ) }} THEN {{ var( + 'document_max_tokens', + 5000 + ) }} + ELSE comment_tokens + END AS comment_tokens + FROM + comments_tokens +), +comments_associated_with_ticket AS ( + SELECT + truncated_comments.engagement_id, + truncated_comments.comment_time, + truncated_comments.comment_markdown, + truncated_comments.comment_tokens, + truncated_comments.source_relation, + tickets.id AS ticket_id, + tickets.property_subject AS ticket_subject + FROM + truncated_comments + JOIN ticket_engagements + ON truncated_comments.engagement_id = ticket_engagements.engagement_id + AND truncated_comments.source_relation = ticket_engagements.source_relation + JOIN tickets + ON tickets.id = ticket_engagements.ticket_id + AND tickets.source_relation = ticket_engagements.source_relation + WHERE + truncated_comments.comment_markdown IS NOT NULL +), +grouped_comment_documents AS ( + SELECT + ticket_id, + ticket_subject, + source_relation, + comment_markdown, + comment_tokens, + comment_time, + SUM(comment_tokens) over ( + PARTITION BY ticket_id + ORDER BY + comment_time rows BETWEEN unbounded preceding + AND CURRENT ROW + ) AS cumulative_length + FROM + comments_associated_with_ticket +), +most_recent_document AS ( + SELECT + ticket_id, + source_relation, + MAX(comment_time) AS most_recent_chunk_update + FROM + grouped_comment_documents + GROUP BY + 1, + 2 +) +SELECT + grouped_comment_documents.ticket_id, + grouped_comment_documents.ticket_subject, + grouped_comment_documents.source_relation, + CAST( + {{ dbt_utils.safe_divide( + 'floor(cumulative_length - 1)', + var( + 'document_max_tokens', + 5000 + ) + ) }} AS {{ dbt.type_int() }} + ) AS chunk_index, + most_recent_document.most_recent_chunk_update, + {{ dbt.listagg( + measure = "comment_markdown", + delimiter_text = "'\\n\\n---\\n\\n'", + order_by_clause = "order by comment_time" + ) }} AS comments_group_markdown, + SUM(comment_tokens) AS chunk_tokens +FROM + grouped_comment_documents + INNER JOIN most_recent_document + ON grouped_comment_documents.ticket_id = most_recent_document.ticket_id + AND grouped_comment_documents.source_relation = most_recent_document.source_relation +GROUP BY + 1, + 2, + 3, + 4, + 5 diff --git a/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql b/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql new file mode 100644 index 0000000..7850986 --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql @@ -0,0 +1,60 @@ +{{ config(enabled = var('rag__using_hubspot', True)) }} + +WITH tickets AS ( + + SELECT + * + FROM + {{ ref('stg_rag_hubspot__ticket') }} +), +ticket_companies AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__ticket_company') }} +), +companies AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__company') }} +), +ticket_with_companies AS ( + SELECT + tickets.id, + tickets.source_relation, + companies.company_name + FROM + tickets + LEFT JOIN ticket_companies + ON tickets.id = ticket_companies.ticket_id + AND tickets.source_relation = ticket_companies.source_relation + LEFT JOIN companies + ON companies.company_id = ticket_companies.company_id + AND companies.source_relation = ticket_companies.source_relation +), +aggregated AS ( + SELECT + id, + source_relation, + {{ dbt.listagg( + 'company_name', + "', '" + ) }} AS company_names + FROM + ticket_with_companies + GROUP BY + id, + source_relation +) +SELECT + COALESCE( + aggregated.company_names, + '' + ) AS company_names, + tickets.* +FROM + tickets + LEFT JOIN aggregated + ON tickets.id = aggregated.id + AND tickets.source_relation = aggregated.source_relation diff --git a/models/staging/hubspot_staging/src_rag_hubspot.yml b/models/staging/hubspot_staging/src_rag_hubspot.yml index f498a12..93dc80e 100644 --- a/models/staging/hubspot_staging/src_rag_hubspot.yml +++ b/models/staging/hubspot_staging/src_rag_hubspot.yml @@ -59,12 +59,12 @@ sources: PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. - name: timestamp - description: > + description: > This field marks the note's time of occurrence and determines where the note sits on the record timeline. You can use either a Unix timestamp in milliseconds or UTC format. PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. - name: property_hubspot_owner_id - description: > + description: > The ID of the owner associated with the note. This field determines the user listed as the note creator on the record timeline. PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. @@ -90,12 +90,12 @@ sources: PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. - name: timestamp - description: > + description: > This field marks the email's time of occurrence and determines where the email sits on the record timeline. You can use either a Unix timestamp in milliseconds or UTC format. PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. - name: property_hubspot_owner_id - description: > + description: > The ID of the owner associated with the email. This field determines the user listed as the email creator on the record timeline. PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. @@ -132,7 +132,7 @@ sources: description: The ID of the related engagement. - name: engagement_contact - identifier: "{{ var('rag_hubspot_engagement_contact_identifier', 'engagement_contact')}}" + identifier: "{{ var('rag_hubspot_engagement_contact_identifier', 'engagement_contact')}}" description: Each record represents a 'link' between a contact and an engagement. config: enabled: "{{ var('hubspot_sales_enabled', true) and var('hubspot_engagement_enabled', true) and var('hubspot_engagement_contact_enabled', true) }}" @@ -142,7 +142,7 @@ sources: - name: contact_id description: The ID of the related contact. - name: engagement_id - description: The ID of the related engagement. + description: The ID of the related engagement. - name: company identifier: "{{ var('rag_hubspot_company_identifier', 'company')}}" @@ -196,7 +196,7 @@ sources: - name: deal_pipeline_stage_id description: The ID of the deal's pipeline stage. - name: owner_id - description: The ID of the deal's owner. + description: The ID of the deal's owner. - name: property_dealname description: The name you have given this deal. - name: property_description @@ -211,8 +211,8 @@ sources: - name: contact identifier: "{{ var('rag_hubspot_contact_identifier', 'contact')}}" freshness: - warn_after: {count: 84, period: hour} - error_after: {count: 168, period: hour} + warn_after: { count: 84, period: hour } + error_after: { count: 168, period: hour } description: Each record represents a contact in Hubspot. config: enabled: "{{ var('rag_hubspot_marketing_enabled', true) and var('rag_hubspot_contact_enabled', true) }}" @@ -282,7 +282,7 @@ sources: description: The ID of the related contact. - name: company description: The ID of the related company. - + - name: team identifier: "{{ var('rag_hubspot_deal_company_identifier', 'team')}}" description: Each record represents a team. @@ -296,4 +296,19 @@ sources: - name: id description: The ID of the team. - name: name - description: The name of the team. \ No newline at end of file + description: The name of the team. + + - name: ticket + identifier: "{{ var('rag_hubspot_ticket_identifier', 'ticket')}}" + config: + enabled: True + + - name: ticket_company + identifier: "{{ var('rag_hubspot_ticket_company_identifier', 'ticket_company')}}" + config: + enabled: True + + - name: ticket_engagement + identifier: "{{ var('rag_hubspot_ticket_engagement_identifier', 'ticket_engagement')}}" + config: + enabled: True diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql new file mode 100644 index 0000000..1de87cd --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql @@ -0,0 +1,34 @@ +{{ config(enabled = var('rag__using_hubspot', True)) }} + +WITH base AS ( + {{ fivetran_utils.union_data( + table_identifier = 'ticket', + database_variable = 'rag_hubspot_database', + schema_variable = 'rag_hubspot_schema', + default_database = target.database, + default_schema = 'rag_hubspot', + default_variable = 'hubspot_ticket', + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} +), +fields AS ( + SELECT + * + {{ fivetran_utils.source_relation( + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} + FROM + base +), +FINAL AS ( + SELECT + * + FROM + fields +) +SELECT + * +FROM + FINAL diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql new file mode 100644 index 0000000..54696f5 --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql @@ -0,0 +1,37 @@ +{{ config(enabled = var('rag__using_hubspot', True)) }} + +{% set hubspot_ticket_company_columns = [ {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, {"name": "category", "datatype": dbt.type_string()}, {"name": "ticket_id", "datatype": dbt.type_int()}, {"name": "company_id", "datatype": dbt.type_int()} ] %} +WITH base AS ( + {{ fivetran_utils.union_data( + table_identifier = 'ticket_company', + database_variable = 'rag_hubspot_database', + schema_variable = 'rag_hubspot_schema', + default_database = target.database, + default_schema = 'rag_hubspot', + default_variable = 'hubspot_ticket_company', + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} +), +fields AS ( + SELECT + {{ fivetran_utils.fill_staging_columns( + source_columns = adapter.get_columns_in_relation(source('rag_hubspot', 'ticket_company')), + staging_columns = hubspot_ticket_company_columns + ) }} + {{ fivetran_utils.source_relation( + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} + FROM + base +) +SELECT + DISTINCT source_relation, + category, + ticket_id, + company_id, + _fivetran_deleted, + CAST(_fivetran_synced AS {{ dbt.type_timestamp() }}) AS _fivetran_synced +FROM + fields diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql new file mode 100644 index 0000000..2b126db --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql @@ -0,0 +1,38 @@ +{{ config(enabled = var('rag__using_hubspot', True)) }} + +{% set hubspot_ticket_engagement_columns = [ {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, {"name": "category", "datatype": dbt.type_string()}, {"name": "ticket_id", "datatype": dbt.type_int()}, {"name": "engagement_id", "datatype": dbt.type_int()}, {"name": "engagement_type", "datatype": dbt.type_string()} ] %} +WITH base AS ( + {{ fivetran_utils.union_data( + table_identifier = 'ticket_engagement', + database_variable = 'rag_hubspot_database', + schema_variable = 'rag_hubspot_schema', + default_database = target.database, + default_schema = 'rag_hubspot', + default_variable = 'hubspot_ticket_engagement', + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} +), +fields AS ( + SELECT + {{ fivetran_utils.fill_staging_columns( + source_columns = adapter.get_columns_in_relation(source('rag_hubspot', 'ticket_engagement')), + staging_columns = hubspot_ticket_engagement_columns + ) }} + {{ fivetran_utils.source_relation( + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} + FROM + base +) +SELECT + source_relation, + category, + ticket_id, + engagement_id, + engagement_type, + _fivetran_deleted, + CAST(_fivetran_synced AS {{ dbt.type_timestamp() }}) AS _fivetran_synced +FROM + fields From 9d57d16e525d6561a7f579ce1b8a61785c25fd52 Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Thu, 24 Apr 2025 14:06:39 -0700 Subject: [PATCH 21/26] stuff --- .../hubspot/int_rag_hubspot__deal_comment_document.sql | 6 +++++- .../int_rag_hubspot__deal_comment_documents_grouped.sql | 6 +++++- .../intermediate/hubspot/int_rag_hubspot__deal_document.sql | 6 +++++- models/staging/hubspot_staging/stg_rag_hubspot__deal.sql | 6 +++++- .../hubspot_staging/stg_rag_hubspot__deal_company.sql | 6 +++++- models/unstructured/rag_hubspot__document.sql | 6 +++++- 6 files changed, 30 insertions(+), 6 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql index ef0bf0b..ff8758b 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and not var('should_exclude_deal', False) +) %} +{{ config(enabled=model_enabled) }} with engagement_deals as ( diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql index 1428bc4..3f8040d 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and not var('should_exclude_deal', False) +) %} +{{ config(enabled=model_enabled) }} with filtered_comment_documents as ( diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index dccbe08..c859c60 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and not var('should_exclude_deal', False) +) %} +{{ config(enabled=model_enabled) }} with deals as ( diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql b/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql index e8074f6..a7f126f 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and not var('should_exclude_deal', False) +) %} +{{ config(enabled=model_enabled) }} with base as ( diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql index 4ec431e..33ed208 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and not var('should_exclude_deal', False) +) %} +{{ config(enabled=model_enabled) }} with base as ( diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index d246927..1a98a83 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and not var('should_exclude_deal', False) +) %} +{{ config(enabled=model_enabled) }} with deal_document as ( From 8e0f0c9050e3dd62579614039655ac66616cbc36 Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Thu, 24 Apr 2025 14:37:26 -0700 Subject: [PATCH 22/26] remove double negative --- .../hubspot/int_rag_hubspot__deal_comment_document.sql | 2 +- .../hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql | 2 +- models/intermediate/hubspot/int_rag_hubspot__deal_document.sql | 2 +- models/staging/hubspot_staging/stg_rag_hubspot__deal.sql | 2 +- .../staging/hubspot_staging/stg_rag_hubspot__deal_company.sql | 2 +- models/unstructured/rag_hubspot__document.sql | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql index ff8758b..96ac221 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql @@ -1,6 +1,6 @@ {% set model_enabled = ( var('rag__using_hubspot', True) - and not var('should_exclude_deal', False) + and var('should_include_deal', True) ) %} {{ config(enabled=model_enabled) }} diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql index 3f8040d..f8b9760 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql @@ -1,6 +1,6 @@ {% set model_enabled = ( var('rag__using_hubspot', True) - and not var('should_exclude_deal', False) + and var('should_include_deal', True) ) %} {{ config(enabled=model_enabled) }} diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index c859c60..c4eff01 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -1,6 +1,6 @@ {% set model_enabled = ( var('rag__using_hubspot', True) - and not var('should_exclude_deal', False) + and var('should_include_deal', True) ) %} {{ config(enabled=model_enabled) }} diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql b/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql index a7f126f..44f6505 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql @@ -1,6 +1,6 @@ {% set model_enabled = ( var('rag__using_hubspot', True) - and not var('should_exclude_deal', False) + and var('should_include_deal', True) ) %} {{ config(enabled=model_enabled) }} diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql index 33ed208..2e658e9 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql @@ -1,6 +1,6 @@ {% set model_enabled = ( var('rag__using_hubspot', True) - and not var('should_exclude_deal', False) + and var('should_include_deal', True) ) %} {{ config(enabled=model_enabled) }} diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index 1a98a83..7e14edc 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -1,6 +1,6 @@ {% set model_enabled = ( var('rag__using_hubspot', True) - and not var('should_exclude_deal', False) + and var('should_include_deal', True) ) %} {{ config(enabled=model_enabled) }} From 0a11784b2fbb139d0bf950dcc258d57ab45f4d3a Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Thu, 24 Apr 2025 14:39:47 -0700 Subject: [PATCH 23/26] make deal data option in company interm object. --- .../int_rag_hubspot__company_document.sql | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index 3bf68ea..a6f6448 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -18,6 +18,13 @@ WITH owners AS ( FROM {{ ref('stg_rag_hubspot__owner') }} ), +company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__company') }} +), +{%- if var('should_include_deal', True) %} deals AS ( SELECT *, @@ -25,12 +32,6 @@ deals AS ( FROM {{ ref('stg_rag_hubspot__deal') }} ), -company AS ( - SELECT - * - FROM - {{ ref('stg_rag_hubspot__company') }} -), deal_company AS ( SELECT * @@ -67,11 +68,16 @@ company_with_deal_description AS ( 1, 2 ) +{%- endif %} SELECT + {%- if var('should_include_deal', True) %} cdd.deal_descriptions AS deals, + {%- endif %} company.* FROM company + {%- if var('should_include_deal', True) %} JOIN company_with_deal_description cdd ON cdd.company_id = company.company_id AND cdd.source_relation = company.source_relation + {%- endif %} From 4a6db311bc64946891a6cd9029a10b54c476c03e Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Thu, 24 Apr 2025 15:40:28 -0700 Subject: [PATCH 24/26] disable rag__unified_document --- models/rag__unified_document.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/models/rag__unified_document.sql b/models/rag__unified_document.sql index b919e3a..a649017 100644 --- a/models/rag__unified_document.sql +++ b/models/rag__unified_document.sql @@ -1,3 +1,5 @@ +{{ config(enabled=False) }} + {{ config( materialized='table' if unified_rag.is_databricks_sql_warehouse() else 'incremental', From 1a543f484a45d8a7d45c2efd716064a2a9f13c30 Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Thu, 24 Apr 2025 15:43:05 -0700 Subject: [PATCH 25/26] nit fix --- .../hubspot/int_rag_hubspot__company_document.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql index a6f6448..22e37cd 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -23,9 +23,9 @@ company AS ( * FROM {{ ref('stg_rag_hubspot__company') }} -), +) {%- if var('should_include_deal', True) %} -deals AS ( +,deals AS ( SELECT *, COALESCE({{ cast('closed_date', dbt.type_string()) }}, 'not closed yet') AS safe_close_date From 2ed95412419fd7ff345f8f492081cdc1439cc710 Mon Sep 17 00:00:00 2001 From: abhijeethp Date: Mon, 30 Jun 2025 21:57:33 -0700 Subject: [PATCH 26/26] make ticket objects optional --- .../hubspot/int_rag_hubspot__ticket_comment_document.sql | 6 +++++- .../hubspot/int_rag_hubspot__ticket_document.sql | 6 +++++- models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql | 6 +++++- .../hubspot_staging/stg_rag_hubspot__ticket_company.sql | 6 +++++- .../hubspot_staging/stg_rag_hubspot__ticket_engagement.sql | 6 +++++- 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql b/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql index e0e7bd3..b2418b8 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql @@ -1,4 +1,8 @@ -{{ config(enabled = var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} WITH tickets AS ( diff --git a/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql b/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql index 7850986..ce0cad9 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql @@ -1,4 +1,8 @@ -{{ config(enabled = var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} WITH tickets AS ( diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql index 1de87cd..18570b5 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql @@ -1,4 +1,8 @@ -{{ config(enabled = var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} WITH base AS ( {{ fivetran_utils.union_data( diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql index 54696f5..95b44e9 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql @@ -1,4 +1,8 @@ -{{ config(enabled = var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} {% set hubspot_ticket_company_columns = [ {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, {"name": "category", "datatype": dbt.type_string()}, {"name": "ticket_id", "datatype": dbt.type_int()}, {"name": "company_id", "datatype": dbt.type_int()} ] %} WITH base AS ( diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql index 2b126db..7d2ecac 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql @@ -1,4 +1,8 @@ -{{ config(enabled = var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} {% set hubspot_ticket_engagement_columns = [ {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, {"name": "category", "datatype": dbt.type_string()}, {"name": "ticket_id", "datatype": dbt.type_int()}, {"name": "engagement_id", "datatype": dbt.type_int()}, {"name": "engagement_type", "datatype": dbt.type_string()} ] %} WITH base AS (