diff --git a/dbt_project.yml b/dbt_project.yml index 9d16e0f..d7d58f7 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -29,6 +29,10 @@ vars: hubspot_contact: "{{ source('rag_hubspot', 'contact') }}" hubspot_deal: "{{ source('rag_hubspot', 'deal') }}" hubspot_owner: "{{ source('rag_hubspot', 'owner') }}" + hubspot_ticket: "{{ source('rag_hubspot', 'ticket') }}" + hubspot_ticket_company: "{{ source('rag_hubspot', 'ticket_company') }}" + hubspot_ticket_engagement: "{{ source('rag_hubspot', 'ticket_engagement') }}" + models: diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index fef72d6..5ffc2cd 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -21,6 +21,7 @@ vars: rag_hubspot_engagement_company_identifier: "hubspot_engagement_company" rag_hubspot_engagement_contact_identifier: "hubspot_engagement_contact" rag_hubspot_engagement_deal_identifier: "hubspot_engagement_deal" + rag_hubspot_engagement_deal_company: "hubspot_deal_company" rag_hubspot_company_identifier: "hubspot_company" rag_hubspot_contact_identifier: "hubspot_contact" rag_hubspot_owner_identifier: "hubspot_owner" diff --git a/macros/staging/hubspot/get_hubspot_deal_company_columns.sql b/macros/staging/hubspot/get_hubspot_deal_company_columns.sql new file mode 100644 index 0000000..aa8199c --- /dev/null +++ b/macros/staging/hubspot/get_hubspot_deal_company_columns.sql @@ -0,0 +1,11 @@ +{% macro get_hubspot_deal_company_columns() %} + +{% set columns = [ + {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, + {"name": "deal_id", "datatype": dbt.type_int()}, + {"name": "company_id", "datatype": dbt.type_int()} +] %} + +{{ return(columns) }} + +{% endmacro %} \ No newline at end of file diff --git a/macros/utility/create_json.sql b/macros/utility/create_json.sql new file mode 100644 index 0000000..b76218c --- /dev/null +++ b/macros/utility/create_json.sql @@ -0,0 +1,38 @@ +{% macro create_json(columns) -%} + {% if target.type == 'bigquery' -%} + TO_JSON_STRING( + STRUCT( + {%- for column in columns -%} + {{ column }} AS {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + ) + {% elif target.type == 'snowflake' -%} + CAST( + OBJECT_CONSTRUCT( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + AS STRING + ) + {% elif target.type == 'redshift' -%} + json_build_object( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + )::VARCHAR + {% elif target.type == 'databricks' -%} + to_json( + named_struct( + {%- for column in columns -%} + '{{ column }}', {{ column }} + {%- if not loop.last -%}, {% endif -%} + {%- endfor -%} + ) + ) + {% endif -%} +{% endmacro -%} diff --git a/models/intermediate/hubspot/int_rag_hubspot__company_document.sql b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql new file mode 100644 index 0000000..22e37cd --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__company_document.sql @@ -0,0 +1,83 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + +WITH owners AS ( + SELECT + *, + COALESCE( + owner_email, + 'UNKNOWN' + ) AS safe_email, + COALESCE( + first_name, + '' + ) AS safe_first_name, + COALESCE( + last_name, + '' + ) AS safe_last_name + FROM + {{ ref('stg_rag_hubspot__owner') }} +), +company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__company') }} +) +{%- if var('should_include_deal', True) %} +,deals AS ( + SELECT + *, + COALESCE({{ cast('closed_date', dbt.type_string()) }}, 'not closed yet') AS safe_close_date + FROM + {{ ref('stg_rag_hubspot__deal') }} +), +deal_company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__deal_company') }} +), +deal_descriptions AS ( + SELECT + DISTINCT deal_id, + source_relation, + safe_close_date AS closed_date, + {{ create_json(['deal_id', 'title', 'safe_close_date']) }} AS deal_description + FROM + deals +), +company_with_deal_description AS ( + SELECT + company.company_id AS company_id, + company.source_relation AS source_relation, + {{ dbt.concat([ + "'['", + dbt.listagg("dd.deal_description", "','", "order by dd.closed_date"), + "']'" + ]) }} AS deal_descriptions + FROM + company + LEFT JOIN deal_company dc + ON dc.company_id = company.company_id + AND dc.source_relation = company.source_relation + LEFT JOIN deal_descriptions dd + ON dd.deal_id = dc.deal_id + AND dc.source_relation = dd.source_relation + GROUP BY + 1, + 2 +) +{%- endif %} +SELECT + {%- if var('should_include_deal', True) %} + cdd.deal_descriptions AS deals, + {%- endif %} + company.* +FROM + company + {%- if var('should_include_deal', True) %} + JOIN company_with_deal_description cdd + ON cdd.company_id = company.company_id + AND cdd.source_relation = company.source_relation + {%- endif %} diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql index ef0bf0b..96ac221 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_document.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_deal', True) +) %} +{{ config(enabled=model_enabled) }} with engagement_deals as ( diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql index 1428bc4..f8b9760 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_comment_documents_grouped.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_deal', True) +) %} +{{ config(enabled=model_enabled) }} with filtered_comment_documents as ( diff --git a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql index e9927aa..c4eff01 100644 --- a/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql +++ b/models/intermediate/hubspot/int_rag_hubspot__deal_document.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_deal', True) +) %} +{{ config(enabled=model_enabled) }} with deals as ( @@ -13,11 +17,19 @@ contacts as ( ), companies as ( - - select * + select + *, + {{ create_json(['company_id', 'company_name']) }} AS company_desc from {{ ref('stg_rag_hubspot__company') }} ), +deal_company AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__deal_company') }} +), + engagements as ( select * from {{ ref('stg_rag_hubspot__engagement') }} @@ -41,6 +53,11 @@ engagement_deals as ( from {{ ref('stg_rag_hubspot__engagement_deal') }} ), +owners AS ( + select * + from {{ ref('stg_rag_hubspot__owner') }} +), + engagement_detail_prep as ( select @@ -52,7 +69,8 @@ engagement_detail_prep as ( {{ unified_rag.coalesce_cast(["contacts.contact_name", "'UNKNOWN'"], dbt.type_string()) }} as contact_name, {{ unified_rag.coalesce_cast(["contacts.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by, {{ unified_rag.coalesce_cast(["companies.company_name", "'UNKNOWN'"], dbt.type_string()) }} as company_name, - {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on + {{ unified_rag.coalesce_cast(["deals.created_date", "'1970-01-01 00:00:00'"], dbt.type_timestamp()) }} AS created_on, + {{ dbt.concat(["coalesce(owners.first_name, '')", "' '", "coalesce(owners.last_name, '')", "' ('", "coalesce(owners.owner_email, '')", "')'"]) }} AS owner_details from deals left join engagement_deals on deals.deal_id = engagement_deals.deal_id @@ -72,6 +90,9 @@ engagement_detail_prep as ( left join companies on engagement_companies.company_id = companies.company_id and engagement_companies.source_relation = companies.source_relation + left join owners + on deals.owner_id = owners.owner_id + and deals.source_relation = owners.source_relation ), engagement_details as ( @@ -84,7 +105,8 @@ engagement_details as ( {{ fivetran_utils.string_agg(field_to_agg="distinct engagement_type", delimiter="', '") }} as engagement_type, {{ fivetran_utils.string_agg(field_to_agg="distinct contact_name", delimiter="', '") }} as contact_name, {{ fivetran_utils.string_agg(field_to_agg="distinct created_by", delimiter="', '") }} as created_by, - {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name + {{ fivetran_utils.string_agg(field_to_agg="distinct company_name", delimiter="', '") }} as company_name, + {{ fivetran_utils.string_agg(field_to_agg="distinct owner_details", delimiter="', '") }} as owner_details from engagement_detail_prep group by 1,2,3,4,5 ), @@ -92,18 +114,31 @@ engagement_details as ( engagement_markdown as ( select - deal_id, - title, - source_relation, - url_reference, + ed.deal_id, + ed.title, + ed.source_relation, + ed.url_reference, cast( {{ dbt.concat([ "'Deal Name : '", "title", "'\\n\\n'", "'Created By : '", "contact_name", "' ('", "created_by", "')\\n'", "'Created On : '", "created_on", "'\\n'", - "'Company Name: '", "company_name", "'\\n'", - "'Engagement Type: '", "engagement_type", "'\\n'" - ]) }} as {{ dbt.type_string() }}) as comment_markdown - from engagement_details + "'Company Name: '", "ed.company_name", "'\\n'", + "'Engagement Type: '", "engagement_type", "'\\n'", + "'Deal Owner: '", "owner_details", "'\\n'" + ]) }} as {{ dbt.type_string() }}) as comment_markdown, + {{ dbt.concat([ + "'['", + dbt.listagg("cc.company_desc", "','"), + "']'" + ]) }} AS companies + from engagement_details ed + left join deal_company dc + on dc.deal_id = ed.deal_id + and dc.source_relation = ed.source_relation + left join companies cc + on dc.company_id = cc.company_id + and dc.source_relation = cc.source_relation + group by 1,2,3,4,5 ), engagement_tokens as ( diff --git a/models/intermediate/hubspot/int_rag_hubspot__engagement.sql b/models/intermediate/hubspot/int_rag_hubspot__engagement.sql new file mode 100644 index 0000000..1038860 --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__engagement.sql @@ -0,0 +1,138 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + +WITH contacts AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__contact') }} +), +engagement_contacts AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__engagement_contact') }} +), +owners AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__owner') }} +), +engagement_emails AS ( + SELECT + engagement_email.engagement_id, + engagement_email.source_relation, + engagement_email.engagement_type, + engagement_email.created_timestamp, + engagement_email.occurred_timestamp, + engagement_email.owner_id, + engagement_email.team_id, + engagement_email.body, + engagement_email.title, + engagement_email.email_to_email, + engagement_email.email_cc_email, + engagement_email.email_from_email AS commenter_email, + {{ fivetran_utils.string_agg( + field_to_agg = "contacts.contact_name", + delimiter = "','" + ) }} AS commenter_name + FROM + {{ ref('stg_rag_hubspot__engagement_email') }} + engagement_email + LEFT JOIN engagement_contacts + ON engagement_email.engagement_id = engagement_contacts.engagement_id + AND engagement_email.source_relation = engagement_contacts.source_relation + LEFT JOIN contacts + ON engagement_contacts.contact_id = contacts.contact_id + AND engagement_contacts.source_relation = contacts.source_relation {{ dbt_utils.group_by(12) }} +), +engagement_notes AS ( + SELECT + engagement_note.engagement_id, + engagement_note.source_relation, + engagement_note.engagement_type, + engagement_note.created_timestamp, + engagement_note.occurred_timestamp, + engagement_note.owner_id, + engagement_note.team_id, + engagement_note.title, + engagement_note.body, + owners.owner_name, + owners.owner_email + FROM + {{ ref('stg_rag_hubspot__engagement_note') }} + engagement_note + LEFT JOIN owners + ON engagement_note.owner_id = owners.owner_id + AND engagement_note.source_relation = owners.source_relation +), +email_comment_details AS ( + SELECT + source_relation, + engagement_id, + {{ unified_rag.coalesce_cast( + ["engagement_emails.commenter_email", "'UNKNOWN'"], + dbt.type_string() + ) }} AS commenter_email, + {{ unified_rag.coalesce_cast( + ["engagement_emails.commenter_name", "'UNKNOWN'"], + dbt.type_string() + ) }} AS commenter_name, + {{ unified_rag.coalesce_cast( + ["engagement_emails.title", "'UNKNOWN'"], + dbt.type_string() + ) }} AS email_title, + {{ unified_rag.coalesce_cast( + ["engagement_emails.created_timestamp", "'1970-01-01 00:00:00'"], + dbt.type_timestamp() + ) }} AS comment_time, + {{ unified_rag.coalesce_cast( + ["engagement_emails.body", "'UNKNOWN'"], + dbt.type_string() + ) }} AS comment_body + FROM + engagement_emails +), +note_comment_details AS ( + SELECT + source_relation, + engagement_id, + {{ unified_rag.coalesce_cast( + ["engagement_notes.owner_email", "'UNKNOWN'"], + dbt.type_string() + ) }} AS commenter_email, + {{ unified_rag.coalesce_cast( + ["engagement_notes.owner_name", "'UNKNOWN'"], + dbt.type_string() + ) }} AS commenter_name, + engagement_notes.title AS engagement_note_title, + {{ unified_rag.coalesce_cast( + ["engagement_notes.created_timestamp", "'1970-01-01 00:00:00'"], + dbt.type_timestamp() + ) }} AS comment_time, + {{ unified_rag.coalesce_cast( + ["engagement_notes.body", "'UNKNOWN'"], + dbt.type_string() + ) }} AS comment_body + FROM + engagement_notes +) +SELECT + source_relation, + engagement_id, + comment_time, + CAST( + {{ dbt.concat([ "'Email subject: '", "email_title", "'\\n'", "'### message from '", "commenter_name", "' ('", "commenter_email", "')\\n'", "'##### sent @ '", "comment_time", "'\\n'", "comment_body" ]) }} AS {{ dbt.type_string() }} + ) AS comment_markdown +FROM + email_comment_details +UNION ALL +SELECT + source_relation, + engagement_id, + comment_time, + CAST( + {{ dbt.concat([ "'Engagement type: Note'", "'\\n'", "'### message from '", "commenter_name", "' ('", "commenter_email", "')\\n'", "'##### sent @ '", "comment_time", "'\\n'", "comment_body" ]) }} AS {{ dbt.type_string() }} + ) AS comment_markdown +FROM + note_comment_details diff --git a/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql b/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql new file mode 100644 index 0000000..b2418b8 --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__ticket_comment_document.sql @@ -0,0 +1,142 @@ +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} + +WITH tickets AS ( + + SELECT + * + FROM + {{ ref('stg_rag_hubspot__ticket') }} +), +ticket_engagements AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__ticket_engagement') }} +), +engagement_details AS ( + SELECT + * + FROM + {{ ref('int_rag_hubspot__engagement') }} +), +comments_tokens AS ( + SELECT + *, + {{ unified_rag.count_tokens("comment_markdown") }} AS comment_tokens + FROM + engagement_details +), +truncated_comments AS ( + SELECT + engagement_id, + source_relation, + comment_time, + CASE + WHEN comment_tokens > {{ var( + 'document_max_tokens', + 5000 + ) }} THEN LEFT( + comment_markdown, + {{ var( + 'document_max_tokens', + 5000 + ) }} * 4 + ) -- approximate 4 characters per token + ELSE comment_markdown + END AS comment_markdown, + CASE + WHEN comment_tokens > {{ var( + 'document_max_tokens', + 5000 + ) }} THEN {{ var( + 'document_max_tokens', + 5000 + ) }} + ELSE comment_tokens + END AS comment_tokens + FROM + comments_tokens +), +comments_associated_with_ticket AS ( + SELECT + truncated_comments.engagement_id, + truncated_comments.comment_time, + truncated_comments.comment_markdown, + truncated_comments.comment_tokens, + truncated_comments.source_relation, + tickets.id AS ticket_id, + tickets.property_subject AS ticket_subject + FROM + truncated_comments + JOIN ticket_engagements + ON truncated_comments.engagement_id = ticket_engagements.engagement_id + AND truncated_comments.source_relation = ticket_engagements.source_relation + JOIN tickets + ON tickets.id = ticket_engagements.ticket_id + AND tickets.source_relation = ticket_engagements.source_relation + WHERE + truncated_comments.comment_markdown IS NOT NULL +), +grouped_comment_documents AS ( + SELECT + ticket_id, + ticket_subject, + source_relation, + comment_markdown, + comment_tokens, + comment_time, + SUM(comment_tokens) over ( + PARTITION BY ticket_id + ORDER BY + comment_time rows BETWEEN unbounded preceding + AND CURRENT ROW + ) AS cumulative_length + FROM + comments_associated_with_ticket +), +most_recent_document AS ( + SELECT + ticket_id, + source_relation, + MAX(comment_time) AS most_recent_chunk_update + FROM + grouped_comment_documents + GROUP BY + 1, + 2 +) +SELECT + grouped_comment_documents.ticket_id, + grouped_comment_documents.ticket_subject, + grouped_comment_documents.source_relation, + CAST( + {{ dbt_utils.safe_divide( + 'floor(cumulative_length - 1)', + var( + 'document_max_tokens', + 5000 + ) + ) }} AS {{ dbt.type_int() }} + ) AS chunk_index, + most_recent_document.most_recent_chunk_update, + {{ dbt.listagg( + measure = "comment_markdown", + delimiter_text = "'\\n\\n---\\n\\n'", + order_by_clause = "order by comment_time" + ) }} AS comments_group_markdown, + SUM(comment_tokens) AS chunk_tokens +FROM + grouped_comment_documents + INNER JOIN most_recent_document + ON grouped_comment_documents.ticket_id = most_recent_document.ticket_id + AND grouped_comment_documents.source_relation = most_recent_document.source_relation +GROUP BY + 1, + 2, + 3, + 4, + 5 diff --git a/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql b/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql new file mode 100644 index 0000000..ce0cad9 --- /dev/null +++ b/models/intermediate/hubspot/int_rag_hubspot__ticket_document.sql @@ -0,0 +1,64 @@ +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} + +WITH tickets AS ( + + SELECT + * + FROM + {{ ref('stg_rag_hubspot__ticket') }} +), +ticket_companies AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__ticket_company') }} +), +companies AS ( + SELECT + * + FROM + {{ ref('stg_rag_hubspot__company') }} +), +ticket_with_companies AS ( + SELECT + tickets.id, + tickets.source_relation, + companies.company_name + FROM + tickets + LEFT JOIN ticket_companies + ON tickets.id = ticket_companies.ticket_id + AND tickets.source_relation = ticket_companies.source_relation + LEFT JOIN companies + ON companies.company_id = ticket_companies.company_id + AND companies.source_relation = ticket_companies.source_relation +), +aggregated AS ( + SELECT + id, + source_relation, + {{ dbt.listagg( + 'company_name', + "', '" + ) }} AS company_names + FROM + ticket_with_companies + GROUP BY + id, + source_relation +) +SELECT + COALESCE( + aggregated.company_names, + '' + ) AS company_names, + tickets.* +FROM + tickets + LEFT JOIN aggregated + ON tickets.id = aggregated.id + AND tickets.source_relation = aggregated.source_relation diff --git a/models/intermediate/jira/int_rag_jira__issue_document.sql b/models/intermediate/jira/int_rag_jira__issue_document.sql index a5c8e83..5373adc 100644 --- a/models/intermediate/jira/int_rag_jira__issue_document.sql +++ b/models/intermediate/jira/int_rag_jira__issue_document.sql @@ -34,6 +34,7 @@ issue_details as ( select issues.issue_id, + issues.issue_key, {{ unified_rag.coalesce_cast(["issues.title", "'UNKNOWN'"], dbt.type_string()) }} as title, {% if var('jira_subdomain', default=None) %} {{ dbt.concat(["'https://'", "jira_subdomain_value", "'.atlassian.net/browse/'", "issues.issue_key"]) }} as url_reference, @@ -70,7 +71,7 @@ final as ( source_relation, url_reference, {{ dbt.concat([ - "'# issue : '", "title", "'\\n\\n'", + "'# issue ('", "issue_key", "') : '", "title", "'\\n\\n'", "'Created By : '", "user_name", "' ('", "created_by", "')\\n'", "'Created On : '", "created_on", "'\\n'", "'Status : '", "status", "'\\n'", diff --git a/models/rag__unified_document.sql b/models/rag__unified_document.sql index b919e3a..a649017 100644 --- a/models/rag__unified_document.sql +++ b/models/rag__unified_document.sql @@ -1,3 +1,5 @@ +{{ config(enabled=False) }} + {{ config( materialized='table' if unified_rag.is_databricks_sql_warehouse() else 'incremental', diff --git a/models/staging/hubspot_staging/src_rag_hubspot.yml b/models/staging/hubspot_staging/src_rag_hubspot.yml index 72a42bf..93dc80e 100644 --- a/models/staging/hubspot_staging/src_rag_hubspot.yml +++ b/models/staging/hubspot_staging/src_rag_hubspot.yml @@ -59,12 +59,12 @@ sources: PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. - name: timestamp - description: > + description: > This field marks the note's time of occurrence and determines where the note sits on the record timeline. You can use either a Unix timestamp in milliseconds or UTC format. PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. - name: property_hubspot_owner_id - description: > + description: > The ID of the owner associated with the note. This field determines the user listed as the note creator on the record timeline. PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. @@ -90,12 +90,12 @@ sources: PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. - name: timestamp - description: > + description: > This field marks the email's time of occurrence and determines where the email sits on the record timeline. You can use either a Unix timestamp in milliseconds or UTC format. PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. - name: property_hubspot_owner_id - description: > + description: > The ID of the owner associated with the email. This field determines the user listed as the email creator on the record timeline. PLEASE NOTE: This field will only be populated for connectors utilizing the HubSpot v3 API version. For the pre HubSpot v3 versions, this value may be found within the parent `engagement` table. @@ -132,7 +132,7 @@ sources: description: The ID of the related engagement. - name: engagement_contact - identifier: "{{ var('rag_hubspot_engagement_contact_identifier', 'engagement_contact')}}" + identifier: "{{ var('rag_hubspot_engagement_contact_identifier', 'engagement_contact')}}" description: Each record represents a 'link' between a contact and an engagement. config: enabled: "{{ var('hubspot_sales_enabled', true) and var('hubspot_engagement_enabled', true) and var('hubspot_engagement_contact_enabled', true) }}" @@ -142,7 +142,7 @@ sources: - name: contact_id description: The ID of the related contact. - name: engagement_id - description: The ID of the related engagement. + description: The ID of the related engagement. - name: company identifier: "{{ var('rag_hubspot_company_identifier', 'company')}}" @@ -196,7 +196,7 @@ sources: - name: deal_pipeline_stage_id description: The ID of the deal's pipeline stage. - name: owner_id - description: The ID of the deal's owner. + description: The ID of the deal's owner. - name: property_dealname description: The name you have given this deal. - name: property_description @@ -211,8 +211,8 @@ sources: - name: contact identifier: "{{ var('rag_hubspot_contact_identifier', 'contact')}}" freshness: - warn_after: {count: 84, period: hour} - error_after: {count: 168, period: hour} + warn_after: { count: 84, period: hour } + error_after: { count: 168, period: hour } description: Each record represents a contact in Hubspot. config: enabled: "{{ var('rag_hubspot_marketing_enabled', true) and var('rag_hubspot_contact_enabled', true) }}" @@ -269,3 +269,46 @@ sources: description: The type of owner. - name: updated_at description: Timestamp representing when the owner was last updated. + + - name: deal_company + identifier: "{{ var('rag_hubspot_deal_company_identifier', 'deal_company')}}" + description: Each record represents a 'link' between a deal and a company. + config: + enabled: "{{ var('rag_hubspot_sales_enabled', true) and var('rag_hubspot_company_enabled', true) and var('rag_hubspot_deal_enabled', true) }}" + columns: + - name: _fivetran_synced + description: '{{ doc("_fivetran_synced") }}' + - name: deal_id + description: The ID of the related contact. + - name: company + description: The ID of the related company. + + - name: team + identifier: "{{ var('rag_hubspot_deal_company_identifier', 'team')}}" + description: Each record represents a team. + config: + enabled: "{{ var('rag_hubspot_team_enabled', true) }}" + columns: + - name: _fivetran_synced + description: '{{ doc("_fivetran_synced") }}' + - name: _fivetran_deleted + description: '{{ doc("_fivetran_deleted") }}' + - name: id + description: The ID of the team. + - name: name + description: The name of the team. + + - name: ticket + identifier: "{{ var('rag_hubspot_ticket_identifier', 'ticket')}}" + config: + enabled: True + + - name: ticket_company + identifier: "{{ var('rag_hubspot_ticket_company_identifier', 'ticket_company')}}" + config: + enabled: True + + - name: ticket_engagement + identifier: "{{ var('rag_hubspot_ticket_engagement_identifier', 'ticket_engagement')}}" + config: + enabled: True diff --git a/models/staging/hubspot_staging/stg_rag_hubspot.yml b/models/staging/hubspot_staging/stg_rag_hubspot.yml index ec0d765..ac1eee2 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot.yml +++ b/models/staging/hubspot_staging/stg_rag_hubspot.yml @@ -279,3 +279,16 @@ models: - name: owner_email description: The email address of the owner. + - name: stg_rag_hubspot__team + description: Each record represents a team in Hubspot. + columns: + - name: _fivetran_synced + description: '{{ doc("_fivetran_synced") }}' + - name: _fivetran_deleted + description: Boolean to mark rows that were deleted in the source database. + - name: id + description: The id of the team. + - name: name + description: Name of the team. + - name: source_relation + description: The source of the record if the unioning functionality is being used. If it is not this field will be empty. \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__company.sql index a1b3970..ca37f04 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__company.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__company.sql @@ -1,59 +1,29 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} - -with base as ( - - {{ - fivetran_utils.union_data( - table_identifier='company', - database_variable='rag_hubspot_database', - schema_variable='rag_hubspot_schema', - default_database=target.database, - default_schema='rag_hubspot', - default_variable='hubspot_company', - union_schema_variable='rag_hubspot_union_schemas', - union_database_variable='rag_hubspot_union_databases' - ) - }} -), - -fields as ( - - select - {{ - fivetran_utils.fill_staging_columns( - source_columns=adapter.get_columns_in_relation(source('rag_hubspot','company')), - staging_columns=get_hubspot_company_columns() - ) - }} - - {{ fivetran_utils.source_relation( - union_schema_variable='rag_hubspot_union_schemas', - union_database_variable='rag_hubspot_union_databases') - }} - from base -), - -final as ( - - select - company_id, - source_relation, - is_company_deleted, - cast(_fivetran_synced as {{ dbt.type_timestamp() }}) as _fivetran_synced, - company_name, - description, - created_date, - industry, - street_address, - street_address_2, - city, - state, - country, - company_annual_revenue - - from fields - -) - -select * -from final \ No newline at end of file +{{ config(enabled = var('rag__using_hubspot', True)) }} + +WITH FINAL AS ( + + SELECT + {{ dbt_utils.star( + from = ref('stg_rag_hubspot__company_fields'), + except = ['id', '_fivetran_synced', 'is_deleted', 'property_name', 'property_description', 'property_createdate', 'property_industry', 'property_address', 'property_address_2', 'property_city', 'property_state', 'property_country', 'property_annualrevenue' ] + ) }}, + id AS company_id, + CAST(_fivetran_synced AS {{ dbt.type_timestamp() }}) AS _fivetran_synced, + is_deleted AS is_company_deleted, + property_name AS company_name, + property_description AS description, + property_createdate AS created_date, + property_industry AS industry, + property_address AS street_address, + property_address_2 AS street_address_2, + property_city AS city, + property_state AS state, + property_country AS country, + property_annualrevenue AS company_annual_revenue + FROM + {{ ref('stg_rag_hubspot__company_fields') }} +) +SELECT + * +FROM + FINAL diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql b/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql new file mode 100644 index 0000000..a2632cc --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__company_fields.sql @@ -0,0 +1,30 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} + +with base as ( + + {{ + fivetran_utils.union_data( + table_identifier='company', + database_variable='rag_hubspot_database', + schema_variable='rag_hubspot_schema', + default_database=target.database, + default_schema='rag_hubspot', + default_variable='hubspot_company', + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases' + ) + }} +), + +fields as ( + + select + * + {{ fivetran_utils.source_relation( + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases') + }} + from base +) + +select * from fields \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql b/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql index e8074f6..44f6505 100644 --- a/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql +++ b/models/staging/hubspot_staging/stg_rag_hubspot__deal.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_deal', True) +) %} +{{ config(enabled=model_enabled) }} with base as ( diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql new file mode 100644 index 0000000..2e658e9 --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__deal_company.sql @@ -0,0 +1,50 @@ +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_deal', True) +) %} +{{ config(enabled=model_enabled) }} + +with base as ( + + {{ + fivetran_utils.union_data( + table_identifier='deal_company', + database_variable='rag_hubspot_database', + schema_variable='rag_hubspot_schema', + default_database=target.database, + default_schema='rag_hubspot', + default_variable='hubspot_deal_company', + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases' + ) + }} +), + +fields as ( + + select + {{ + fivetran_utils.fill_staging_columns( + source_columns=adapter.get_columns_in_relation(source('rag_hubspot','deal_company')), + staging_columns=get_hubspot_deal_company_columns() + ) + }} + + {{ fivetran_utils.source_relation( + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases') + }} + from base +), + +final as ( + + select + deal_id, + company_id, + source_relation + from fields +) + +select * +from final diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__team.sql b/models/staging/hubspot_staging/stg_rag_hubspot__team.sql new file mode 100644 index 0000000..499bc5a --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__team.sql @@ -0,0 +1,55 @@ +{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set hubspot_team_columns = [ + {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, + {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, + {"name": "id", "datatype": dbt.type_int()}, + {"name": "name", "datatype": dbt.type_string()} +] %} + + +with base as ( + + {{ + fivetran_utils.union_data( + table_identifier='team', + database_variable='rag_hubspot_database', + schema_variable='rag_hubspot_schema', + default_database=target.database, + default_schema='rag_hubspot', + default_variable='hubspot_team', + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases' + ) + }} +), + +fields as ( + + select + {{ + fivetran_utils.fill_staging_columns( + source_columns=adapter.get_columns_in_relation(source('rag_hubspot','team')), + staging_columns= hubspot_team_columns + ) + }} + + {{ fivetran_utils.source_relation( + union_schema_variable='rag_hubspot_union_schemas', + union_database_variable='rag_hubspot_union_databases') + }} + from base +), + +final as ( + + select + id, + name, + source_relation, + _fivetran_deleted, + cast(_fivetran_synced as {{ dbt.type_timestamp() }}) as _fivetran_synced, + from fields +) + +select * +from final \ No newline at end of file diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql new file mode 100644 index 0000000..18570b5 --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket.sql @@ -0,0 +1,38 @@ +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} + +WITH base AS ( + {{ fivetran_utils.union_data( + table_identifier = 'ticket', + database_variable = 'rag_hubspot_database', + schema_variable = 'rag_hubspot_schema', + default_database = target.database, + default_schema = 'rag_hubspot', + default_variable = 'hubspot_ticket', + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} +), +fields AS ( + SELECT + * + {{ fivetran_utils.source_relation( + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} + FROM + base +), +FINAL AS ( + SELECT + * + FROM + fields +) +SELECT + * +FROM + FINAL diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql new file mode 100644 index 0000000..95b44e9 --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_company.sql @@ -0,0 +1,41 @@ +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} + +{% set hubspot_ticket_company_columns = [ {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, {"name": "category", "datatype": dbt.type_string()}, {"name": "ticket_id", "datatype": dbt.type_int()}, {"name": "company_id", "datatype": dbt.type_int()} ] %} +WITH base AS ( + {{ fivetran_utils.union_data( + table_identifier = 'ticket_company', + database_variable = 'rag_hubspot_database', + schema_variable = 'rag_hubspot_schema', + default_database = target.database, + default_schema = 'rag_hubspot', + default_variable = 'hubspot_ticket_company', + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} +), +fields AS ( + SELECT + {{ fivetran_utils.fill_staging_columns( + source_columns = adapter.get_columns_in_relation(source('rag_hubspot', 'ticket_company')), + staging_columns = hubspot_ticket_company_columns + ) }} + {{ fivetran_utils.source_relation( + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} + FROM + base +) +SELECT + DISTINCT source_relation, + category, + ticket_id, + company_id, + _fivetran_deleted, + CAST(_fivetran_synced AS {{ dbt.type_timestamp() }}) AS _fivetran_synced +FROM + fields diff --git a/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql new file mode 100644 index 0000000..7d2ecac --- /dev/null +++ b/models/staging/hubspot_staging/stg_rag_hubspot__ticket_engagement.sql @@ -0,0 +1,42 @@ +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_ticket', True) +) %} +{{ config(enabled=model_enabled) }} + +{% set hubspot_ticket_engagement_columns = [ {"name": "_fivetran_synced", "datatype": dbt.type_timestamp()}, {"name": "_fivetran_deleted", "datatype": dbt.type_boolean()}, {"name": "category", "datatype": dbt.type_string()}, {"name": "ticket_id", "datatype": dbt.type_int()}, {"name": "engagement_id", "datatype": dbt.type_int()}, {"name": "engagement_type", "datatype": dbt.type_string()} ] %} +WITH base AS ( + {{ fivetran_utils.union_data( + table_identifier = 'ticket_engagement', + database_variable = 'rag_hubspot_database', + schema_variable = 'rag_hubspot_schema', + default_database = target.database, + default_schema = 'rag_hubspot', + default_variable = 'hubspot_ticket_engagement', + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} +), +fields AS ( + SELECT + {{ fivetran_utils.fill_staging_columns( + source_columns = adapter.get_columns_in_relation(source('rag_hubspot', 'ticket_engagement')), + staging_columns = hubspot_ticket_engagement_columns + ) }} + {{ fivetran_utils.source_relation( + union_schema_variable = 'rag_hubspot_union_schemas', + union_database_variable = 'rag_hubspot_union_databases' + ) }} + FROM + base +) +SELECT + source_relation, + category, + ticket_id, + engagement_id, + engagement_type, + _fivetran_deleted, + CAST(_fivetran_synced AS {{ dbt.type_timestamp() }}) AS _fivetran_synced +FROM + fields diff --git a/models/unstructured/rag_hubspot__document.sql b/models/unstructured/rag_hubspot__document.sql index ece1210..7e14edc 100644 --- a/models/unstructured/rag_hubspot__document.sql +++ b/models/unstructured/rag_hubspot__document.sql @@ -1,4 +1,8 @@ -{{ config(enabled=var('rag__using_hubspot', True)) }} +{% set model_enabled = ( + var('rag__using_hubspot', True) + and var('should_include_deal', True) +) %} +{{ config(enabled=model_enabled) }} with deal_document as ( @@ -18,18 +22,19 @@ final as ( cast(deal_document.deal_id as {{ dbt.type_string() }}) as document_id, coalesce(deal_document.title, grouped.title) as title, deal_document.url_reference, + deal_document.companies, 'hubspot' as platform, deal_document.source_relation, grouped.most_recent_chunk_update, - grouped.chunk_index, + coalesce(grouped.chunk_index, 0) as chunk_index, grouped.chunk_tokens as chunk_tokens_approximate, {{ dbt.concat([ "deal_document.comment_markdown", "'\\n\\n## COMMENTS\\n\\n'", - "grouped.comments_group_markdown"]) }} + "coalesce(grouped.comments_group_markdown, '')"]) }} as chunk from deal_document - join grouped + left join grouped on grouped.deal_id = deal_document.deal_id and grouped.source_relation = deal_document.source_relation )