diff --git a/Project.toml b/Project.toml index 6a947e3e8..114e83e81 100644 --- a/Project.toml +++ b/Project.toml @@ -47,7 +47,7 @@ DataAPI = "1" Dictionaries = "0.3, 0.4" DynamicQuantities = "1" FileIO = "1.1" -GLM = "1.9.1" +GLM = "1.9.2" GeoInterface = "1" GeometryBasics = "0.4.1, 0.5" GridLayoutBase = "0.6, 0.7, 0.8, 0.9, 0.10, 0.11" diff --git a/docs/src/reference/analyses.md b/docs/src/reference/analyses.md index 0a0b30975..068d8a807 100644 --- a/docs/src/reference/analyses.md +++ b/docs/src/reference/analyses.md @@ -152,6 +152,34 @@ specs = data(df) * mapping(:x, :y, color=:a => nonnumeric) * ( draw(specs) ``` +Below are a few examples showing the [`linear`](@ref) interface for performing weighted fits: + +```@example analyses +using Random +Random.seed!(111) +colors = Makie.wong_colors() +df = let + x = 1:5 + y_err = randn(length(x)) + y = x .+ y_err + y_unc = abs.(y_err) + (; x, y, y_unc) +end +specs = data(df) * mapping(:x, :y) * ( + (visual(Scatter) + mapping(:y_unc) * visual(Errorbars)) * visual(; label = "data") + + mapping(; weights = :y_unc) * ( + linear(; weighttype = :fweights, weighttransform = x -> inv.(x .^ 2)) * visual(; color = colors[1], label = "fweights") + + # Other weights available once GLM v2 is released + #linear(; weighttype = :aweights) * visual(; color = colors[2], label = "aweights") + #linear(; weighttype = :pweights) * visual(; color = colors[3], label = "pweights") + ) +) +draw(specs) +``` + +Note that the usual caveats still apply for working with different kinds of weights. See the [StatsBase.jl documentation](https://juliastats.org/StatsBase.jl/stable/weights/) for more. + ## Smoothing ```@docs diff --git a/src/transformations/linear.jl b/src/transformations/linear.jl index 793a95253..0e4325afb 100644 --- a/src/transformations/linear.jl +++ b/src/transformations/linear.jl @@ -3,6 +3,9 @@ Base.@kwdef struct LinearAnalysis{I} dropcollinear::Bool = false interval::I = automatic level::Float64 = 0.95 + weighttype::Symbol = :fweights + weighttransform = identity + distr::GLM.Distribution = GLM.Normal() end function add_intercept_column(x::AbstractVector{T}) where {T} @@ -12,15 +15,43 @@ function add_intercept_column(x::AbstractVector{T}) where {T} return mat end +function get_weighttype(s::Symbol) + weighttype = if s == :fweights + StatsBase.fweights + else + throw(ArgumentError("Currently, GLM.jl only supports `StatsBase.fweights`.")) + end + + # TODO: Can support these weights as well after GLM v2 is released + # https://github.com/JuliaStats/GLM.jl/pull/619 + #weighttype = if s == :aweights + # StatsBase.aweights + #elseif s == :pweights + # StatsBase.pweights + #elseif s == :fweights + # StatsBase.fweights + #else + # throw(ArgumentError("Currently, GLM.jl only supports `aweights`, `pweights`, and `fweights`.")) + #end + + return weighttype +end + # TODO: add multidimensional version function (l::LinearAnalysis)(input::ProcessedLayer) output = map(input) do p, n x, y = p - weights = StatsBase.fweights(get(n, :weights, similar(x, 0))) - default_interval = length(weights) > 0 ? nothing : :confidence - interval = l.interval === automatic ? default_interval : l.interval + weights = (get_weighttype(l.weighttype) ∘ l.weighttransform)(get(n, :weights, similar(x, 0))) + interval = l.interval === automatic ? :confidence : l.interval # FIXME: handle collinear case gracefully - lin_model = GLM.lm(add_intercept_column(x), y; weights, l.dropcollinear) + # TODO: `wts` --> `weights` after GLM v2 is released + # https://github.com/JuliaStats/GLM.jl/pull/631 + lin_model = if isempty(weights) + GLM.lm(add_intercept_column(x), y; l.dropcollinear) + else + # Supports confidence intervals, while `GLM.lm` currently does not + GLM.glm(add_intercept_column(x), y, l.distr; weights, l.dropcollinear) + end x̂ = range(extrema(x)..., length = l.npoints) pred = GLM.predict(lin_model, add_intercept_column(x̂); interval, l.level) return if !isnothing(interval) @@ -50,7 +81,7 @@ function (l::LinearAnalysis)(input::ProcessedLayer) end """ - linear(; interval=automatic, level=0.95, dropcollinear=false, npoints=200) + linear(; interval=automatic, level=0.95, dropcollinear=false, npoints=200, weighttype=:fweights, weighttransform=identity, distr=GLM.Normal()) Compute a linear fit of `y ~ 1 + x`. An optional named mapping `weights` determines the weights. Use `interval` to specify what type of interval the shaded band should represent, @@ -63,6 +94,11 @@ it is possible to set `dropcollinear=true`. `npoints` is the number of points used by Makie to draw the shaded band. Weighted data is supported via the keyword `weights` (passed to `mapping`). +Additional weight support is provided via the `weighttype`, `weighttransform`, and `distr` keywords. +`weightype` specifies the `StatsBase.AbstractWeights` type to use. +`weighttransform` accepts an optional function to transform the weights before they are passed to `GLM.glm`. +`distr` is forwarded to `GLM.glm`. +See the GLM.jl documentation for more on working with weighted data. This transformation creates two `ProcessedLayer`s labelled `:prediction` and `:ci`, which can be styled separately with `[subvisual](@ref)`. """