diff --git a/README.md b/README.md
index aab7c45..d840eaf 100644
--- a/README.md
+++ b/README.md
@@ -113,3 +113,8 @@ If you want to include any of these files (or a variation or modification thereo
## Acknowledgements
This research has received funding from the European Research Council (ERC) under the European Union's Horizon 2020 research and innovation programme under grant agreement No. 670035 (project [*Con Espressione*](https://www.jku.at/en/institute-of-computational-perception/research/projects/con-espressione/)).
+
+
+
+
+
diff --git a/basismixer/__init__.py b/basismixer/__init__.py
index a2f4efa..3322e4e 100644
--- a/basismixer/__init__.py
+++ b/basismixer/__init__.py
@@ -11,8 +11,7 @@
OnsetwiseDecompositionDynamicsCodec,
TimeCodec,
)
-from basismixer.basisfunctions import make_basis
-# from basismixer.data import make_dataset
+
from basismixer.data import make_datasets
# define a version variable
diff --git a/basismixer/assets/perfwise_insertions_deletions.ods b/basismixer/assets/perfwise_insertions_deletions.ods
new file mode 100644
index 0000000..11016f6
Binary files /dev/null and b/basismixer/assets/perfwise_insertions_deletions.ods differ
diff --git a/basismixer/basisfunctions.py b/basismixer/basisfunctions.py
deleted file mode 100644
index da26c1f..0000000
--- a/basismixer/basisfunctions.py
+++ /dev/null
@@ -1,1419 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import logging
-import numpy as np
-from scipy.interpolate import interp1d
-import types
-
-import partitura.score as score
-
-LOGGER = logging.getLogger(__name__)
-
-
-class InvalidBasisException(Exception):
- pass
-
-
-def print_basis_functions():
- """Print a list of all basisfunction names defined in this module,
- with descriptions where available.
-
- """
- module = sys.modules[__name__]
- doc_indent = 4
- for name in list_basis_functions():
- print('* {}'.format(name))
- member = getattr(sys.modules[__name__], name)
- if member.__doc__:
- print(' ' * doc_indent + member.__doc__.replace('\n', ' ' * doc_indent + '\n'))
-
-
-def list_basis_functions():
- """Return a list of all basisfunction names defined in this module.
-
- The basisfunction names listed here can be specified by name in
- the `make_basis` function. For example:
-
- >>> basis, names = make_basis(part, ['metrical_basis', 'articulation_basis'])
-
- Returns
- -------
- list
- A list of strings
-
- """
- module = sys.modules[__name__]
- bfs = []
- exclude = {'make_basis'}
- for name in dir(module):
- if name in exclude:
- continue
- member = getattr(sys.modules[__name__], name)
- if isinstance(member, types.FunctionType) and name.endswith('_basis'):
- bfs.append(name)
- return bfs
-
-
-def make_basis(part, basis_functions):
- """Compute the specified basis functions for a part.
-
- The function returns the computed basis functions as a N x M
- array, where N equals `len(part.notes_tied)` and M equals the
- total number of descriptors of all basis functions that occur in
- part.
-
- Furthermore the function returns the names of the basis functions.
- A list of strings of size M. The names have the name of the
- function prepended to the name of the descriptor. For example if a
- function named `abc_basis` returns descriptors `a`, `b`, and `c`,
- then the list of names returned by `make_basis(part,
- ['abc_basis'])` will be ['abc_basis.a', 'abc_basis.b',
- 'abc_basis.c'].
-
- Parameters
- ----------
- part : Part
- The score as a Part instance
- basis_functions : list
- A list of basis functions. Elements of the list can be either
- the functions themselves or the names of a basis function as
- strings (or a mix). The basis functions specified by name are
- looked up in the `basismixer.basisfunctions` module.
-
- Returns
- -------
- basis : ndarray
- The basis functions
- names : list
- The basis names
-
- """
-
- acc = []
-
- for bf in basis_functions:
-
- if isinstance(bf, str):
- # get function by name from module
- func = getattr(sys.modules[__name__], bf)
- elif isinstance(bf, types.FunctionType):
- func = bf
- else:
- LOGGER.warning('Ignoring unknown basis function {}'.format(bf))
-
- bf, bn = func(part)
-
- # check if the size and number of the basis function are correct
- if bf.shape[1] != len(bn):
- msg = ('number of basis names {} does not equal '
- 'number of basis {}'.format(len(bn), bf.shape[1]))
- raise InvalidBasisException(msg)
- n_notes = len(part.notes_tied)
- if len(bf) != n_notes:
- msg = ('length of basis {} does not equal '
- 'number of notes {}'.format(len(bf), n_notes))
- raise InvalidBasisException(msg)
-
- if np.any(np.logical_or(np.isnan(bf), np.isinf(bf))):
- problematic = np.unique(np.where(np.logical_or(np.isnan(bf), np.isinf(bf)))[1])
- msg = ('NaNs or Infs found in the following basis: {} '
- .format(', '.join(np.array(bn)[problematic])))
- raise InvalidBasisException(msg)
-
- # prefix basis names by function name
- bn = ['{}.{}'.format(func.__name__, n) for n in bn]
-
- acc.append((bf, bn))
-
- _data, _names = zip(*acc)
- basis_data = np.column_stack(_data)
- basis_names = [n for ns in _names for n in ns]
- return basis_data, basis_names
-
-
-def polynomial_pitch_basis(part):
- """Polynomial pitch basis.
-
- Returns:
- * pitch : the midi pitch of the note
- * pitch^2 : the square of the midi pitch
- * pitch^3 : the power of 3 of the midi pitch
-
- """
-
- basis_names = ['pitch', 'pitch^2', 'pitch^3']
- max_pitch = 127
- pitches = np.array(
- [n.midi_pitch for n in part.notes_tied]).astype(np.float)
- W = np.column_stack((pitches / max_pitch,
- pitches**2 / max_pitch**2,
- pitches**3 / max_pitch**3))
-
- return W, basis_names
-
-
-def duration_basis(part):
- """Duration basis.
-
- Returns:
- * duration : the duration of the note
-
- """
-
- basis_names = ['duration']
-
- nd = np.array([(n.start.t, n.end_tied.t) for n in part.notes_tied])
- bm = part.beat_map
-
- durations_beat = bm(nd[:, 1]) - bm(nd[:, 0])
- W = durations_beat
- W.shape = (-1, 1)
- return W, basis_names
-
-def onset_basis(part):
- """Onset basis
-
- Returns:
- * onset : the onset of the note in beats
- * score_position : position of the note in the score between 0 (the beginning of the piece) and 1 (the end of the piece)
-
- TODO:
- * rel_position_repetition
- """
- basis_names = ['onset', 'score_position']
-
- onsets = np.array([n.start.t for n in part.notes_tied])
- bm = part.beat_map
- onsets_beat = bm(onsets)
- rel_position = normalize(onsets_beat, method='minmax')
-
- W = np.column_stack((onsets_beat, rel_position))
-
- return W, basis_names
-
-def relative_score_position_basis(part):
- W, names = onset_basis(part)
- return W[:, 1:], names[1:]
-
-
-def grace_basis(part):
- """Grace basis.
-
- Returns:
- * grace_note : 1 when the note is a grace note, 0 otherwise
- * n_grace : the length of the grace note sequence to which
- this note belongs (0 for non-grace notes)
- * grace_pos : the (1-based) position of the grace note in
- the sequence (0 for non-grace notes)
-
- """
-
- basis_names = ['grace_note', 'n_grace', 'grace_pos']
-
- notes = part.notes_tied
- W = np.zeros((len(notes), 3))
- for i, n in enumerate(notes):
- grace = isinstance(n, score.GraceNote)
- if grace:
- n_grace = n.grace_seq_len
- W[i, 0] = 1
- W[i, 1] = n_grace
- W[i, 2] = n_grace - sum(1 for _ in n.iter_grace_seq()) + 1
-
- return W, basis_names
-
-
-def loudness_direction_basis(part):
- """The loudness directions in part.
-
- This function returns a varying number of descriptors, depending
- on which directions are present. Some directions are grouped
- together. For example 'decrescendo' and 'diminuendo' are encoded
- together in a descriptor 'loudness_decr'. The descriptor names of
- textual directions such as 'adagio' are the verbatim directions.
-
- Some possible descriptors:
- * p : piano
- * f : forte
- * pp : pianissimo
- * loudness_incr : crescendo direction
- * loudness_decr : decrescendo or diminuendo direction
-
- """
-
- onsets = np.array([n.start.t for n in part.notes_tied])
- N = len(onsets)
-
- directions = list(part.iter_all(
- score.LoudnessDirection, include_subclasses=True))
-
- def to_name(d):
- if isinstance(d, score.ConstantLoudnessDirection):
- return d.text
- elif isinstance(d, score.ImpulsiveLoudnessDirection):
- return d.text
- elif isinstance(d, score.IncreasingLoudnessDirection):
- return 'loudness_incr'
- elif isinstance(d, score.DecreasingLoudnessDirection):
- return 'loudness_decr'
-
- basis_by_name = {}
- for d in directions:
- j, bf = basis_by_name.setdefault(to_name(d),
- (len(basis_by_name), np.zeros(N)))
- bf += basis_function_activation(d)(onsets)
-
- W = np.empty((len(onsets), len(basis_by_name)))
- names = [None] * len(basis_by_name)
- for name, (j, bf) in basis_by_name.items():
- W[:, j] = bf
- names[j] = name
-
- return W, names
-
-
-def tempo_direction_basis(part):
- """The tempo directions in part.
-
- This function returns a varying number of descriptors, depending
- on which directions are present. Some directions are grouped
- together. For example 'adagio' and 'molto adagio' are encoded
- together in a descriptor 'adagio'.
-
- Some possible descriptors:
- * adagio : directions like 'adagio', 'molto adagio'
-
- """
- onsets = np.array([n.start.t for n in part.notes_tied])
- N = len(onsets)
-
- directions = list(part.iter_all(
- score.TempoDirection, include_subclasses=True))
-
- def to_name(d):
- if isinstance(d, score.ResetTempoDirection):
- ref = d.reference_tempo
- if ref:
- return ref.text
- else:
- return d.text
- elif isinstance(d, score.ConstantTempoDirection):
- return d.text
- elif isinstance(d, score.IncreasingTempoDirection):
- return 'tempo_incr'
- elif isinstance(d, score.DecreasingTempoDirection):
- return 'tempo_decr'
-
- basis_by_name = {}
- for d in directions:
- j, bf = basis_by_name.setdefault(to_name(d),
- (len(basis_by_name), np.zeros(N)))
- bf += basis_function_activation(d)(onsets)
-
- W = np.empty((len(onsets), len(basis_by_name)))
- names = [None] * len(basis_by_name)
- for name, (j, bf) in basis_by_name.items():
- W[:, j] = bf
- names[j] = name
-
- return W, names
-
-
-def articulation_direction_basis(part):
- """
- """
- onsets = np.array([n.start.t for n in part.notes_tied])
- N = len(onsets)
-
- directions = list(part.iter_all(
- score.ArticulationDirection, include_subclasses=True))
-
- def to_name(d):
- return d.text
-
- basis_by_name = {}
-
- for d in directions:
-
- j, bf = basis_by_name.setdefault(to_name(d),
- (len(basis_by_name), np.zeros(N)))
- bf += basis_function_activation(d)(onsets)
-
- W = np.empty((len(onsets), len(basis_by_name)))
- names = [None] * len(basis_by_name)
-
- for name, (j, bf) in basis_by_name.items():
-
- W[:, j] = bf
- names[j] = name
-
- return W, names
-
-
-def basis_function_activation(direction):
- epsilon = 1e-6
-
- if isinstance(direction, (score.DynamicLoudnessDirection,
- score.DynamicTempoDirection)):
- # a dynamic direction will be encoded as a ramp from d.start.t to
- # d.end.t, and then a step from d.end.t to the start of the next
- # constant direction.
-
- # There are two potential issues:
-
- # Issue 1. d.end is None (e.g. just a ritardando without dashes). In this case
- if direction.end:
- direction_end = direction.end.t
- else:
- # assume the end of d is the end of the measure:
- measure = next(direction.start.iter_prev(score.Measure, eq=True), None)
- if measure:
- direction_end = measure.start.t
- else:
- # no measure, unlikely, but not impossible.
- direction_end = direction.start.t
-
- if isinstance(direction, score.TempoDirection):
- next_dir = next(direction.start.iter_next(
- score.ConstantTempoDirection), None)
- if isinstance(direction, score.ArticulationDirection):
- next_dir = next(direction.start.iter_next(
- score.ConstantArticulationDirection), None)
- else:
- next_dir = next(direction.start.iter_next(
- score.ConstantLoudnessDirection), None)
-
- if next_dir:
- # TODO: what do we do when next_dir is too far away?
- sustained_end = next_dir.start.t
- else:
- # Issue 2. there is no next constant direction. In that case the
- # basis function will be a ramp with a quarter note ramp
- sustained_end = direction_end + direction.start.quarter
-
- x = [direction.start.t,
- direction_end - epsilon,
- sustained_end - epsilon]
- y = [0, 1, 1]
-
- elif isinstance(direction, (score.ConstantLoudnessDirection,
- score.ConstantArticulationDirection,
- score.ConstantTempoDirection)):
- x = [direction.start.t - epsilon,
- direction.start.t,
- direction.end.t - epsilon,
- direction.end.t]
- y = [0, 1, 1, 0]
-
- else: # impulsive
- x = [direction.start.t - epsilon,
- direction.start.t,
- direction.start.t + epsilon]
- y = [0, 1, 0]
-
- return interp1d(x, y, bounds_error=False, fill_value=0)
-
-
-def slur_basis(part):
- """Slur basis.
-
- Returns:
- * slur_incr : a ramp function that increases from 0
- to 1 over the course of the slur
- * slur_decr : a ramp function that decreases from 1
- to 0 over the course of the slur
-
- """
- names = ['slur_incr', 'slur_decr']
- onsets = np.array([n.start.t for n in part.notes_tied])
- slurs = part.iter_all(score.Slur)
- W = np.zeros((len(onsets), 2))
-
- for slur in slurs:
- if not slur.end:
- continue
- x = [slur.start.t, slur.end.t]
- y_inc = [0, 1]
- y_dec = [1, 0]
- W[:, 0] += interp1d(x, y_inc, bounds_error=False, fill_value=0)(onsets)
- W[:, 1] += interp1d(x, y_dec, bounds_error=False, fill_value=0)(onsets)
-
- return W, names
-
-
-def articulation_basis(part):
- """Articulation basis.
-
- This basis returns articulation-related note annotations, such as accents, legato, and tenuto.
-
- Possible descriptors:
- * accent : 1 when the note has an annotated accent sign
- * legato : 1 when the note has an annotated legato sign
- * staccato : 1 when the note has an annotated staccato sign
- ...
-
- """
- names = ['accent', 'strong-accent', 'staccato', 'tenuto',
- 'detached-legato', 'staccatissimo', 'spiccato',
- 'scoop', 'plop', 'doit', 'falloff', 'breath-mark',
- 'caesura', 'stress', 'unstress', 'soft-accent']
- basis_by_name = {}
- notes = part.notes_tied
- N = len(notes)
- for i, n in enumerate(notes):
- if n.articulations:
- for art in n.articulations:
- if art in names:
- j, bf = basis_by_name.setdefault(
- art,
- (len(basis_by_name), np.zeros(N)))
- bf[i] = 1
-
- M = len(basis_by_name)
- W = np.empty((N, M))
- names = [None] * M
-
- for name, (j, bf) in basis_by_name.items():
- W[:, j] = bf
- names[j] = name
-
- return W, names
-
-# # for a subset of the articulations do e.g.
-# def staccato_basis(part):
-# W, names = articulation_basis(part)
-# if 'staccato' in names:
-# i = names.index('staccato')
-# return W[:, i:i + 1], ['staccato']
-# else:
-# return np.empty(len(W)), []
-
-
-def fermata_basis(part):
- """Fermata basis.
-
- Returns:
- * fermata : 1 when the note coincides with a fermata sign.
-
- """
- names = ['fermata']
- onsets = np.array([n.start.t for n in part.notes_tied])
- W = np.zeros((len(onsets), 1))
- for ferm in part.iter_all(score.Fermata):
- W[onsets == ferm.start.t, 0] = 1
- return W, names
-
-
-def metrical_basis(part):
- """Metrical basis
-
- This basis encodes the metrical position in the bar. For example
- the first beat in a 3/4 meter is encoded in a binary descriptor
- 'metrical_3_4_0', the fifth beat in a 6/8 meter as
- 'metrical_6_8_4', etc. Any positions that do not fall on a beat
- are encoded in a basis suffixed '_weak'. For example a note
- starting on the second 8th note in a bar of 4/4 meter will have a
- non-zero value in the 'metrical_4_4_weak' descriptor.
-
- """
- notes = part.notes_tied
- ts_map = part.time_signature_map
- bm = part.beat_map
- basis_by_name = {}
- eps = 10**-6
-
- for i, n in enumerate(notes):
-
- beats, beat_type = ts_map(n.start.t).astype(int)
- measure = next(n.start.iter_prev(score.Measure, eq=True), None)
-
- if measure:
- measure_start = measure.start.t
- else:
- measure_start = 0
-
- pos = bm(n.start.t) - bm(measure_start)
-
- if pos % 1 < eps:
- name = 'metrical_{}_{}_{}'.format(beats, beat_type, int(pos))
- else:
- name = 'metrical_{}_{}_weak'.format(beats, beat_type)
-
- j, bf = basis_by_name.setdefault(name,
- (len(basis_by_name), np.zeros(len(notes))))
- bf[i] = 1
-
- W = np.empty((len(notes), len(basis_by_name)))
- names = [None] * len(basis_by_name)
- for name, (j, bf) in basis_by_name.items():
- W[:, j] = bf
- names[j] = name
-
- return W, names
-
-def metrical_strength_basis(part):
- """Metrical strength basis
-
- This basis encodes the beat phase (relative position of a note within
- the measure), as well as metrical strength of common time signatures.
- """
- notes = part.notes_tied
- ts_map = part.time_signature_map
- bm = part.beat_map
-
- names = ['beat_phase',
- 'metrical_strength_downbeat',
- 'metrical_strength_secondary',
- 'metrical_strength_weak']
-
- W = np.zeros((len(notes), len(names)))
- for i, n in enumerate(notes):
-
- beats, beat_type = ts_map(n.start.t).astype(int)
- measure = next(n.start.iter_prev(score.Measure, eq=True), None)
-
- if beats == 4:
- # for 4/4
- sec_beat = 2
- elif beats == 6:
- # for 6/8
- sec_beat = 3
- elif beats == 12:
- # for 12/8
- sec_beat = 6
- else:
- sec_beat = None
-
- if measure:
- measure_start = measure.start.t
- else:
- measure_start = 0
-
- pos = bm(n.start.t) - bm(measure_start)
-
- m_pos = np.mod(pos, beats)
-
- W[i, 0] = m_pos / beats
-
- if m_pos == 0:
- W[i, 1] = 1
- elif m_pos == sec_beat:
- W[i, 2] = 1
- else:
- W[i, 3] = 1
-
- return W, names
-
-def time_signature_basis(part):
- """TIme Signature basis
- This basis encodes the time signature of the note in two sets of one-hot vectors,
- a one hot encoding of number of beats and a one hot encoding of beat type
- """
-
- notes = part.notes_tied
- ts_map = part.time_signature_map
- possible_beats = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 'other']
- possible_beat_types = [1, 2, 4, 8, 16, 'other']
- W_beats = np.zeros((len(notes), len(possible_beats)))
- W_types = np.zeros((len(notes), len(possible_beat_types)))
-
- names = (['time_signature_num_{0}'.format(b) for b in possible_beats] +
- ['time_signature_den_{0}'.format(b) for b in possible_beat_types])
-
- for i, n in enumerate(notes):
- beats, beat_type = ts_map(n.start.t).astype(int)
-
- if beats in possible_beats:
- W_beats[i, beats - 1] = 1
- else:
- W_beats[i, -1] = 1
-
- if beat_type in possible_beat_types:
- W_types[i, possible_beat_types.index(beat_type)] = 1
- else:
- W_types[i, -1] = 1
-
- W = np.column_stack((W_beats, W_types))
-
- return W, names
-
-def vertical_neighbor_basis(part):
- """Vertical neighbor basis.
-
- Describes various aspects of simultaneously starting notes.
-
- Returns:
- * n_total :
- * n_above :
- * n_below :
- * highest_pitch :
- * lowest_pitch :
- * pitch_range :
-
- """
- # the list of descriptors
- names = ['n_total', 'n_above', 'n_below',
- 'highest_pitch', 'lowest_pitch', 'pitch_range']
- # notes
- notes = part.notes_tied
-
- W = np.empty((len(notes), len(names)))
- for i, n in enumerate(part.notes_tied):
- neighbors = np.array([n.midi_pitch for n in
- n.start.starting_objects[score.Note]])
- max_pitch = np.max(neighbors)
- min_pitch = np.min(neighbors)
- W[i, 0] = len(neighbors)
- W[i, 1] = np.sum(neighbors > n.midi_pitch)
- W[i, 2] = np.sum(neighbors < n.midi_pitch)
- W[i, 3] = max_pitch
- W[i, 4] = min_pitch
- W[i, 5] = max_pitch - min_pitch
- return W, names
-
-
-def normalize(data, method='minmax'):
- """
- Normalize data in one of several ways.
-
- The available normalization methods are:
-
- * minmax
- Rescale `data` to the range `[0, 1]` by subtracting the minimum
- and dividing by the range. If `data` is a 2d array, each column is
- rescaled to `[0, 1]`.
-
- * tanh
- Rescale `data` to the interval `(-1, 1)` using `tanh`. Note that
- if `data` is non-negative, the output interval will be `[0, 1)`.
-
- * tanh_unity
- Like "soft", but rather than rescaling strictly to the range (-1,
- 1), following will hold:
-
- normalized = normalize(data, method="tanh_unity")
- np.where(data==1) == np.where(normalized==1)
-
- That is, the normalized data will equal one wherever the original data
- equals one. The target interval is `(-1/np.tanh(1), 1/np.tanh(1))`.
-
- Parameters
- ----------
- data: ndarray
- Data to be normalized
- method: {'minmax', 'tanh', 'tanh_unity'}, optional
- The normalization method. Defaults to 'minmax'.
-
- Returns
- -------
- ndarray
- Normalized copy of the data
- """
-
- """Normalize the data in `data`. There are several normalization
-
- """
- if method == 'minmax':
- vmin = np.min(data, 0)
- vmax = np.max(data, 0)
-
- if np.isclose(vmin, vmax):
- # Return all values as 0 or as 1?
- return np.zeros_like(data)
- else:
- return (data - vmin) / (vmax - vmin)
- elif method == 'tanh':
- return np.tanh(data)
- elif method == 'tanh_unity':
- return np.tanh(data) / np.tanh(1)
-
-
-# from extra.utils.data_utils import smooth
-# from extra.data_handling.sparse_feature_extraction import (
-# scorepart_to_notes,
-# notes_to_notecentered_pianoroll,
-# notes_to_pianoroll_note_slices,
-# )
-# from music_utils.key_id.key_identification import (
-# key_identification,
-# key_to_scaledegree,
-# fifths_to_key,
-# SCALE_DEGREES,
-# KEYS)
-
-# # from extra.data_handling.annotation_tokenizer import tokenizer, TokenizeException
-
-# class NoteCenteredPianoRollBasis(Basis):
-# # lowest_pitch = 21
-# # highest_pitch = 108
-# neighbour_pitches = 36
-# neighbour_beats = 8
-# beat_div = 8
-# names = ['{0}'.format(i) for i in
-# range((2 * neighbour_pitches + 1) * (2 * neighbour_beats * beat_div))]
-
-# @classmethod
-# def makeBasis(cls, score_part):
-# notes, idx = scorepart_to_notes(score_part)
-# W = notes_to_notecentered_pianoroll(
-# notes, onset_only=False,
-# neighbour_pitches=cls.neighbour_pitches,
-# neighbour_beats=cls.neighbour_beats,
-# beat_div=cls.beat_div)
-# # print('pitch span', r)
-# return FeatureBasis(W, cls.make_full_names())
-
-
-# def scorepart_to_onsetwise_pianoroll(score_part, morphetic_pitch=False, return_ioi=False):
-# notes, _ = scorepart_to_notes(score_part, morphetic_pitch)
-# start, end = notes[0, 0], notes[-1, 0]
-# return notes_to_pianoroll_note_slices(notes, return_ioi=return_ioi)
-
-
-# def zero_mean_pianoroll(X):
-# t, p = np.nonzero(X)
-# center = 64
-# Z = np.zeros_like(X)
-# for i, t_i in enumerate(unique_onset_idx(t)):
-# avg_pitch = int(np.round(np.mean(p[t_i])))
-# new_pitches = p[t_i] - avg_pitch + center
-# try:
-# Z[t[t_i], new_pitches] = 1
-# except IndexError:
-# new_pitches[new_pitches < 0 ] = 0
-# new_pitches[new_pitches >= Z.shape[1] ] = Z.shape[1] - 1
-# Z[t[t_i], new_pitches] = 1
-# return Z
-
-# class SelfSimilarityBasis(Basis):
-# _filter_sizes_abs = (5, 10, 20, 50, 100)
-# _max_prop = .7
-# _filter_sizes_prop = (.005, .01, .05, .1, .2, .3, .5)
-# names = (['abs_{}'.format(x) for x in _filter_sizes_abs] +
-# ['prop_{}'.format(x) for x in _filter_sizes_prop] +
-# ['centered_abs_{}'.format(x) for x in _filter_sizes_abs] +
-# ['centered_prop_{}'.format(x) for x in _filter_sizes_prop])
-
-# @classmethod
-# def makeBasis(cls, score_part):
-# pr = scorepart_to_onsetwise_pianoroll(score_part, morphetic_pitch=True).toarray()
-# onsets = np.array([n.start.t for n in score_part.notes])
-# uox = unique_onset_idx(onsets)
-
-# N = len(onsets)
-# # N x 128
-# X_n = pr.T[np.array([x[0] for x in uox])]
-# pr = None
-# X = np.corrcoef(X_n)
-# X[np.isnan(X)] = 0
-
-# names = []
-# W = np.empty((N, 0))
-
-# W_abs, k_abs = self_sim_conv_absolute(X, K=cls._filter_sizes_abs, max_prop=cls._max_prop)
-# if len(k_abs) > 0:
-# names.extend(['abs_{}'.format(x) for x in k_abs])
-# W = np.column_stack((W, expand_array(W_abs, uox, N)))
-
-# W_prop, k_prop = self_sim_conv_proportional(X, K=cls._filter_sizes_prop)
-# if len(k_prop) > 0:
-# names.extend(['prop_{}'.format(x) for x in k_prop])
-# W = np.column_stack((W, expand_array(W_prop, uox, N)))
-
-# X_n = zero_mean_pianoroll(X_n)
-
-# X = np.corrcoef(X_n)
-# X[np.isnan(X)] = 0
-
-# W_abs, k_abs = self_sim_conv_absolute(X, K=cls._filter_sizes_abs, max_prop=cls._max_prop)
-# if len(k_abs) > 0:
-# names.extend(['centered_abs_{}'.format(x) for x in k_abs])
-# W = np.column_stack((W, expand_array(W_abs, uox, N)))
-
-# W_prop, k_prop = self_sim_conv_proportional(X, K=cls._filter_sizes_prop)
-# if len(k_prop) > 0:
-# names.extend(['centered_prop_{}'.format(x) for x in k_prop])
-# W = np.column_stack((W, expand_array(W_prop, uox, N)))
-
-# return FeatureBasis(normalize(W), cls.make_full_names(names))
-
-# def expand_array(x, idx, N):
-# """
-# Given an array `x` and a list of grouped indices `idx`, return a new array `y`,
-# where the values of `x` are duplicated according to `idx`, such that:
-
-# y[idx[i]] = x[i], where idx[i] is an array of integers
-
-# This function is a convenience function to duplicate onsetwise features (`x`) to
-# obtain notewise features (`y`).
-
-# Argument `N` is the length of the output array.
-
-# Warning: there are no checks that `N` is consistent with `idx`, and that the
-# values in `idx` fill all of `y`.
-
-# For example: let x = [1, 2, 3] and idx = [[0, 1], [2], [3, 4]], (and N = 5,
-# redundantly), then y = [1, 1, 2, 3, 3]
-
-# Parameters
-# ----------
-# x: ndarray
-# Array with values (can be multidimensional)
-# idx: list
-# List of index-arrays
-# N: int
-# Size of the expanded array
-
-# Returns
-# -------
-# ndarray
-# Expanded array
-# """
-
-
-# s = tuple([N] + list(x.shape)[1:])
-# y = np.empty(s)
-# for v, i in zip(x, idx):
-# y[i] = v
-# return y
-
-# class PianorollBasis(Basis):
-# names = ['{0}'.format(i) for i in
-# range(128)] + ['log2_duration']
-
-# @classmethod
-# def makeBasis(cls, score_part):
-# W, ioi = scorepart_to_onsetwise_pianoroll(score_part, return_ioi=True)
-# W = W.T.toarray()
-# # print(W.shape, ioi.shape)
-# # print(np.unique(np.sort(ioi)))
-# assert np.sum(np.sum(W, 1) > 0) == W.shape[0]
-# W = np.column_stack((W, np.log2(ioi)))
-# return FeatureBasis(soft_normalize(W), cls.make_full_names())
-
-# class ExtremePitchBasis(Basis):
-
-# """
-# This basis computes the highest and lowest pitch at each score position
-# Each row in the resulting matrix corresponds to a note in the score and
-# contains the highest and lowest pitch of the score position to which
-# the note belongs (i.e. for the same extreme pitches will appear for all
-# notes that belong to the same score position.
-
-# highestpitch : highest pitch of each score position
-# lowestpitch : lowest pitch of each score position
-# """
-# names = ['highestpitch', 'lowestpitch']
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-
-# Q = 127.0
-# # Pitches and onsets
-# p_o = np.array([(n.midi_pitch, n.start.t) for n in scorePart.notes])
-
-# unique_onsets = np.unique(p_o[:, 1])
-
-# unique_onset_idxs = [np.where(p_o[:, 1] == u)[0] for u in unique_onsets]
-
-# pitches = [p_o[ix, 0] for ix in unique_onset_idxs]
-
-# W = np.zeros((len(p_o), 2))
-
-# for u, p in zip(unique_onset_idxs, pitches):
-# W[u, 0] = p.max() / Q
-# W[u, 1] = p.min() / Q
-
-# return FeatureBasis(W, cls.make_full_names())
-
-
-# class VerticalIntervalClassBasis(Basis):
-# """
-# Three features describing up to three vertical interval classes
-# above the bass, i.e. the intervals between the notes of a chord and
-# the lowest pitch excluding pitch class repetition and octaves
-
-# vertical_intervals_{1,2,3}
-# """
-
-# names = ['vertical_interval_class_1',
-# 'vertical_interval_class_2',
-# 'vertical_interval_class_3']
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-
-# Q = 11.0
-# # Pitches and onsets
-# p_o = np.array([(n.midi_pitch, n.start.t) for n in scorePart.notes])
-
-# # get unique onsets
-# unique_onsets = np.unique(p_o[:, 1])
-
-# # get unique_onset_idxs
-# unique_onset_idxs = [np.where(p_o[:, 1] == u)[0] for u in unique_onsets]
-
-# pitches = [p_o[ix, 0] for ix in unique_onset_idxs]
-
-# W = np.zeros((len(p_o), 3))
-
-# for u, p in zip(unique_onset_idxs, pitches):
-# # Vertical interval class combination
-# pitch_classes = np.unique(np.mod(p, 12))
-# vertical_intervals = pitch_classes - pitch_classes.min()
-# vertical_intervals.sort()
-
-# # Normalize the vintc to lie between 0 and 1
-# W[u, :len(vertical_intervals[slice(1, 4)])] = (
-# vertical_intervals[slice(1, 4)]) / Q
-
-# return FeatureBasis(W, cls.make_full_names())
-
-
-# class VerticalNeighborBasis(Basis):
-
-# """
-# This basis has three members:
-
-# lower_neighbors: the number of simultaneously starting notes with lower pitches
-# upper_neighbors: the number of simultaneously starting notes with higher pitches
-
-# """
-
-# names = ['lower_neighbors', 'upper_neighbors', 'total_neighbors']
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-# t_dict = partition(lambda n: n.start.t, scorePart.notes)
-# n_dict = {}
-# for k, v in t_dict.items():
-# v.sort(key=attrgetter('midi_pitch'))
-# N = len(v) - 1
-# for i, n in enumerate(v):
-# n_dict[n] = (i, N - i, N + 1)
-# W = np.array([n_dict[n] for n in scorePart.notes])
-# return FeatureBasis(soft_normalize(W), cls.make_full_names())
-
-
-# class UIOIBasis(Basis):
-# """
-# This basis has two members:
-
-# ioi_prev: the time interval between the current onset (t) with the previous onset (t-1)
-# ioi_next: the time interval between (t-2) and (t-3)
-
-
-# """
-# names = ['u_ioi_prev', 'u_ioi_next']
-# @classmethod
-# def makeBasis(cls, scorePart):
-# onsets = np.array([n.start.t for n in scorePart.notes])
-# u_onset_idx = unique_onset_idx(onsets)
-# u_onsets = np.array([onsets[ii[0]] for ii in u_onset_idx])
-# # include offset of last note for computing last 'ioi'
-# u_ioi = np.diff(np.r_[u_onsets, scorePart.notes[-1].end.t])
-# u_W = np.column_stack((np.r_[0, u_ioi[:-1]], u_ioi)).astype(np.float)
-# W = np.empty((len(onsets), 2))
-# for i, ii in enumerate(u_onset_idx):
-# W[ii,:] = u_W[i,:]
-# return FeatureBasis(soft_normalize(W, preserve_unity=True), cls.make_full_names())
-
-# class IOIBasis(Basis):
-
-# """
-# This basis has three members:
-
-# ioi_prev1: the time interval between the current onset (t) with the previous onset (t-1)
-# ioi_prev2: the time interval between (t-1) and (t-2)
-# ioi_prev3: the time interval between (t-2) and (t-3)
-
-# Each of these values is 0 in case there are no prior onsets
-
-# In this basis, the next onset is defined as the next onset that is
-
-# """
-
-# names = ['ioi_prev1', 'ioi_prev2', 'ioi_prev3',
-# 'ioi_next1', 'ioi_next2', 'ioi_next3']
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-# t_dict = {}
-# for note in scorePart.notes:
-# pred1 = note.start.get_prev_of_type(Note)
-# if len(pred1) > 1:
-# d1 = note.start.t - pred1[0].start.t
-# pred2 = pred1[0].start.get_prev_of_type(Note)
-# if len(pred2) > 1:
-# d2 = pred1[0].start.t - pred2[0].start.t
-# pred3 = pred2[0].start.get_prev_of_type(Note)
-# if len(pred3) > 1:
-# d3 = pred2[0].start.t - pred3[0].start.t
-# else:
-# d3 = 0
-# else:
-# d2 = 0
-# d3 = 0
-# else:
-# d1 = 0
-# d2 = 0
-# d3 = 0
-
-# succ1 = note.start.get_next_of_type(Note)
-# if len(succ1) > 1:
-# d4 = succ1[0].start.t - note.start.t
-# succ2 = succ1[0].start.get_next_of_type(Note)
-# if len(succ2) > 1:
-# d5 = succ2[0].start.t - succ1[0].start.t
-# succ3 = succ2[0].start.get_next_of_type(Note)
-# if len(succ3) > 1:
-# d6 = succ3[0].start.t - succ2[0].start.t
-# else:
-# d6 = 0
-# else:
-# d5 = 0
-# d6 = 0
-# else:
-# d4 = 0
-# d5 = 0
-# d6 = 0
-
-# t_dict[note.start] = (d1, d2, d3, d4, d5, d6)
-# W = np.array([t_dict[n.start] for n in scorePart.notes])
-# return FeatureBasis(soft_normalize(W, preserve_unity=True), cls.make_full_names())
-
-# # @classmethod
-# # def makeBasis(cls, scorePart):
-# # t_dict = {}
-# # for tp in scorePart.timeline.points:
-# # if tp.prev:
-# # d1 = tp.t - tp.prev.t
-# # if tp.prev.prev:
-# # d2 = tp.prev.t - tp.prev.prev.t
-# # if tp.prev.prev.prev:
-# # d3 = tp.prev.prev.t - tp.prev.prev.prev.t
-# # else:
-# # d3 = 0
-# # else:
-# # d2 = 0
-# # else:
-# # d1 = 0
-# # d2 = 0
-# # d3 = 0
-
-# # if tp.next:
-# # d4 = tp.next.t - tp.t
-# # if tp.next.next:
-# # d5 = tp.next.next.t - tp.next.t
-# # if tp.next.next.next:
-# # d6 = tp.next.next.next.t - tp.next.next.t
-# # else:
-# # d6 = 0
-# # else:
-# # d5 = 0
-# # else:
-# # d4 = 0
-# # d5 = 0
-# # d6 = 0
-
-# # t_dict[tp] = (d1, d2, d3, d4, d5, d6)
-# # W = np.array([t_dict[n.start] for n in scorePart.notes])
-# # return FeatureBasis(normalize(W), cls.make_full_names())
-
-
-# class RitardandoBasis(Basis):
-# names = ['ritardando']
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-# end = scorePart.timeline.points[0].t
-# start = scorePart.timeline.points[-1].t
-# W = np.array([n.start.t for n in scorePart.notes], dtype=np.float)
-# W = np.exp(((W - start) / (end - start))**100) - 1
-# W.shape = (-1, 1)
-# return FeatureBasis(soft_normalize(W, preserve_unity=True), cls.make_full_names())
-
-
-# class SlurBasis(Basis):
-# names = ['slur_step', 'slur_incr', 'slur_decr']
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-# slurs = scorePart.timeline.get_all_of_type(Slur)
-
-# W = np.zeros((len(scorePart.notes), 3), dtype=np.float32)
-
-# if len(slurs) > 0:
-# ss = np.array([(s.voice, s.start.t, s.end.t)
-# for s in slurs
-# if (s.start is not None and
-# s.end is not None)])
-
-# if ss.shape[0] < len(slurs):
-# LOGGER.info("Ignoring {0} of {1} slurs for missing start or end"
-# .format(len(slurs) - ss.shape[0], len(slurs)))
-
-# # begin make arch
-# onsets = np.array([n.start.t for n in scorePart.notes])
-# first = np.min(onsets)
-# last = np.max(onsets)
-# eps = 10**-4
-
-# for v, start, end in ss:
-# tmap = np.array([[min(first, start - eps), 0, 0],
-# [start - eps, 0, 0],
-# [start, 0, 1],
-# [end, 1, 0],
-# [end + eps, 0, 0],
-# [max(last, end + eps), 0, 0]])
-# incr = interp1d(tmap[:, 0], tmap[:, 1])
-# decr = interp1d(tmap[:, 0], tmap[:, 2])
-# W[:, 1] += incr(onsets)
-# W[:, 2] += decr(onsets)
-
-# start_idx = np.argsort(ss[:, 1])
-# end_idx = np.argsort(ss[:, 2])
-
-# ss_start = ss[start_idx,:]
-# ss_end = ss[end_idx,:]
-
-# idx = np.arange(ss.shape[0], dtype=np.int)
-
-# idx_start = idx[start_idx]
-# idx_end = idx[end_idx]
-
-# ndnv = np.array([(n.start.t, n.voice) for n in scorePart.notes])
-
-# start_before = np.searchsorted(
-# ss_start[:, 1], ndnv[:, 0], side='right')
-# end_after = np.searchsorted(ss_end[:, 2], ndnv[:, 0], side='left')
-
-# for i in range(ndnv.shape[0]):
-# spanning = tuple(
-# set(idx_start[:start_before[i]]).intersection(set(idx_end[end_after[i]:])))
-# W[i, 0] = 1 if ndnv[i, 1] in ss[spanning, 0] else 0
-
-# return FeatureBasis(W, cls.make_full_names())
-
-
-# class ScoreTimeBasis(Basis):
-# names = ['beat']
-# @classmethod
-# def makeBasis(cls, scorePart):
-# nd = np.array([n.start.t for n in scorePart.notes])
-# bm = scorePart.beat_map
-# W = bm(nd)
-# W.shape = (-1, 1)
-# return FeatureBasis(W, cls.make_full_names())
-
-# class RestBasis(Basis):
-# names = ['precedes_rest', 'precedes_rest_narrow', 'precedes_rest_mid', 'precedes_rest_wide']
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-# smooth_k = 2
-# smooth_k_mid = 6
-# smooth_k_wide = 10
-
-# t_rest = dict((n.start.t, 1 if len(n.end.get_starting_objects_of_type(Note)) == 0 else 0)
-# for n in scorePart.notes)
-
-# t_sorted = sorted(t_rest.keys())
-
-# smoothed = np.column_stack(([t_rest[k] for k in t_sorted],
-# smooth([t_rest[k] for k in t_sorted], smooth_k),
-# smooth([t_rest[k] for k in t_sorted], smooth_k_mid),
-# smooth([t_rest[k] for k in t_sorted], smooth_k_wide)))
-# rest_smooth = dict((k, x) for k, x in zip(t_sorted, smoothed))
-
-# W = np.array([rest_smooth[n.start.t] for n in scorePart.notes])
-# return FeatureBasis(normalize(W), cls.make_full_names())
-
-
-# class RepeatBasis(Basis):
-# names = ['repeat_end', 'repeat_end_short_ramp', 'repeat_end_med_ramp', 'repeat_end_wide_ramp']
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-# smooth_k = 2
-# smooth_k_mid = 6
-# smooth_k_wide = 10
-
-# on_repeat = dict((tp.t, 0 if len(tp.get_ending_objects_of_type(Repeat)) == 0 else 1)
-# for tp in scorePart.timeline.points)
-# on_repeat[scorePart.timeline.points[-1].t] = 1
-# t_sorted = sorted(on_repeat.keys())
-
-# smoothed = np.column_stack((
-# [on_repeat[k] for k in t_sorted],
-# smooth([on_repeat[k] for k in t_sorted], smooth_k),
-# smooth([on_repeat[k] for k in t_sorted], smooth_k_mid),
-# smooth([on_repeat[k] for k in t_sorted], smooth_k_wide)))
-
-# repeat_smooth = dict((k, x) for k, x in zip(t_sorted, smoothed))
-
-# W = np.array([repeat_smooth[n.end.t] for n in scorePart.notes])
-# return FeatureBasis(normalize(W), cls.make_full_names())
-
-
-# class HarmonicBasis(Basis):
-# names = KEYS + SCALE_DEGREES
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-# tl = scorePart.timeline
-# key_sigs = tl.get_all_of_type(KeySignature)
-# measures = tl.get_all_of_type(Measure)
-
-# note_info = np.array([(n.midi_pitch, n.start.t, n.end.t)
-# for n in scorePart.notes])
-
-# bar_onsets = np.array([m.start.t for m in measures])
-
-# key_info = [(ks.fifths, ks.mode, ks.start.t)
-# for ks in key_sigs]
-
-# idx = np.searchsorted(note_info[:, 1], bar_onsets)
-# idx_key = np.searchsorted(note_info[:, 1], [x[2] for x in key_info])
-
-# key_segments = []
-# for key_notes in np.split(note_info, idx_key)[1:]:
-# key_segments.append(key_notes)
-
-# segments = []
-# for bar_notes in np.split(note_info, idx)[1:]:
-# if len(bar_notes) > 0:
-# segments.append(bar_notes)
-
-# key_gt = []
-# for ks, seg in zip(key_info, key_segments):
-# key_gt.append((fifths_to_key(ks[0], ks[1], seg), ks[2]))
-
-# # for segment in segments:
-# # print segment
-# viterbi_path = key_identification(segments, key_gt, 'temperley')
-# # print viterbi path
-# key_seq = []
-# scale_degree_sect = []
-
-# for ky, segment in zip(viterbi_path, segments):
-# # print ky,segment
-# for kych in key_gt:
-
-# try:
-# if segment[0, 1] >= kych[1]:
-# kyc = kych[0]
-
-# except:
-# pass
-
-# scale_degree_sect += [key_to_scaledegree(ky, kyc)] * len(segment)
-# key_seq += [ky] * len(segment)
-
-# W_key = np.zeros((len(note_info[:, 0]), 24))
-# W_sd = np.zeros((len(note_info[:, 0]), len(SCALE_DEGREES)))
-# for ii, ky in enumerate(zip(key_seq, scale_degree_sect)):
-# W_key[ii, KEYS.index(ky[0])] = 1
-# W_sd[ii, SCALE_DEGREES.index(ky[1])] = 1
-
-# W = np.hstack((W_key, W_sd))
-
-# return FeatureBasis(W, cls.make_full_names())
-
-# class PredictivePianorollBasis(Basis):
-
-# # If this class attribute is defined, it will be set by
-# # `lbm.utils.basisUtilities.set_derived_data_folder`, which should be called
-# # before the makeBasis method is called
-# data_folder = None
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-# onsets = np.array([n.start.t for n in scorePart.notes])
-# # Initialize matrix of basis functions
-# uox = unique_onset_idx(onsets)
-# N = len(uox)
-# W = None
-# names = None
-# if cls.data_folder is not None:
-# fn = os.path.join(cls.data_folder,
-# '{}_hidden.npy'.format(scorePart.piece_name))
-# try:
-# W_onset = np.load(fn)
-# # print(W_onset.shape, fn, len(uox))
-# W = expand_array(W_onset, uox, len(onsets))
-# if len(W) != len(onsets):
-# LOGGER.warning(('Data shape from {} does not coincide with '
-# 'the number of onsets in the score: {} vs {}')
-# .format(fn, W.shape, len(onsets)))
-# W = None
-# else:
-# names = ['feature{0:04}'.format(i) for i in range(W.shape[1])]
-# except:
-# LOGGER.warning('Could not load data from {}'.format(fn))
-
-
-# else:
-# LOGGER.warning('Cannot create PredictivePianorollBasis, because no derived data folder has been specified')
-
-# if W is None:
-# names = []
-# W = np.zeros((len(onsets), 0))
-
-# return FeatureBasis(soft_normalize(W), cls.make_full_names(names))
-
-# class HarmonicTensionBasis(Basis):
-# names = ['key', 'diameter', 'centroid']
-
-# # this should be set from outside before makeBasis is called
-# data_folder = None
-
-# @classmethod
-# def makeBasis(cls, scorePart):
-
-# onsets = np.array([n.start.t for n in scorePart.notes])
-# bars = scorePart.timeline.get_all_of_type(Measure)
-
-# # compute how many outputs Dorien code generates for this piece:
-
-# # default value used in Dorien's code
-# nvis = 4
-# ndivs = (bars[-1].end.t - bars[0].start.t)
-# ws = (ndivs / len(bars) ) / nvis
-# # print('expected nr of nlines in files', ndivs / ws)
-
-# start = bars[0].start.t
-# end = bars[-1].end.t
-
-# # Initialize matrix of basis functions
-# W = np.zeros((len(onsets), len(cls.names)))
-
-# if cls.data_folder is not None:
-# # Load harmonic tension information from Doriens
-# # XmlTensionVisualiser.jar output files
-# for i, b_name in enumerate(cls.names):
-# fn = os.path.join(cls.data_folder,
-# '{}_{}.data'.format(scorePart.piece_name, b_name))
-# try:
-# data = np.loadtxt(fn)[:, 1]
-# except:
-# LOGGER.warning('Could not load data from {}'.format(fn))
-# continue
-
-# data = np.r_[0, data, 0]
-# times = np.arange(start, end, ws)
-
-# if len(times) == len(data) - 2:
-# times = np.r_[start, times + ws / 2., end]
-# elif len(times) - 1 == len(data):
-# times = np.r_[start, times[1:], end]
-# else:
-# LOGGER.info('HarmonicTensionBasis expected {} data points from {}, got {}'
-# .format(len(times), fn, len(data)))
-# times = np.linspace(start, end, len(data))
-# W[:, i] = interp1d(times, data)(onsets)
-# else:
-# LOGGER.warning('Cannot create HarmonicTensionBasis, because no derived data folder has been specified')
-
-# return FeatureBasis(soft_normalize(W), cls.make_full_names())
diff --git a/basismixer/data.py b/basismixer/data.py
index 8cad09d..431ff5c 100644
--- a/basismixer/data.py
+++ b/basismixer/data.py
@@ -1,158 +1,186 @@
#!/usr/bin/env python
import logging
-import os
+import multiprocessing
+import warnings
+from multiprocessing import Pool
+from pathlib import Path
import numpy as np
-from torch.utils.data import Dataset, ConcatDataset
-
+import partitura.musicanalysis
from partitura import load_musicxml, load_match
-from partitura.score import expand_grace_notes, remove_grace_notes
-from basismixer.basisfunctions import make_basis
+from partitura.score import expand_grace_notes
+from torch.utils.data import Dataset
+
+from basismixer.performance_codec import get_performance_codec
from basismixer.utils import (pair_files,
get_unique_onset_idxs,
notewise_to_onsetwise)
-
-from basismixer.performance_codec import get_performance_codec
+from .parse_tsv_alignment import load_alignment_from_ASAP
LOGGER = logging.getLogger(__name__)
+from partitura.score import GraceNote, Note
-def make_datasets(model_specs, mxml_folder, match_folder, pieces=None,
- quirks=False, gracenotes='remove'):
- """Create an dataset for each in a list of model specifications.
-
- A model specification is a dictionary with the keys 'onsetwise'
- (bool), 'basis_functions' (a list of basis function names),
- 'parameter_names' (a list of parameter names) and 'seq_len' (an
- integer). For example:
- {
- 'onsetwise': False,
- 'basis_functions: ['polynomial_pitch_basis', 'duration_basis'],
- 'parameter_names': ['velocity_trend', 'beat_period'],
- 'seq_len': 1
- }
+def remove_grace_notes(part):
+ """Remove all grace notes from a timeline.
- The datasets are created based on pairs of MusicXML files and
- match files found in `mxml_folder` and `match_folder`
- respectively.
+ The specified timeline object will be modified in place.
Parameters
----------
- model_specs : list
- A list of dictionaries
- mxml_folder : str
- Path to folder with MusicXML files
- match_folder : str
- Path to folder with Match files
- pieces : list or None, optional
- If not None only pieces with a piecename occurring in the list
- are included in the datasets
- quirks : bool, optional
- If True some changes are made to make the function work with
- the Magaloff/Zeilinger datasets. Defaults to False.
-
- Returns
- -------
- list
- A list of triplets (datasets, input_names, output_names) with
- the same length as `model_specs`. `datasets` is a list of
- datasets (one per performance), `input_names` and
- `output_names` are labels to identify the inputs and outputs,
- respectively
-
- """
+ timeline : Timeline
+ The timeline from which to remove the grace notes
- all_targets = list(set([n for model_spec in model_specs
- for n in model_spec['parameter_names']]))
+ """
+ for gn in list(part.iter_all(GraceNote)):
+ for n in list(part.iter_all(Note)):
+ if n.tie_next == gn:
+ n.tie_next = None
+ part.remove(gn)
- perf_codec = get_performance_codec(all_targets)
- # different subsets of basis functions may be returned for different
- # pieces. idx_map maintains a global mapping from basis names to
- # indices into the columns of the model inputs.
- bf_idx_map = {}
- # a list to gather the data from which the dataset will be built.
+def process_piece(piece_performances, perf_codec, all_basis_functions, gracenotes, dataset_name):
+ piece, performances = piece_performances
data = []
-
- all_basis_functions = set([n for model_spec in model_specs
- for n in model_spec['basis_functions']])
- folders = dict(mxml=mxml_folder, match=match_folder)
-
- # by_prefix should be used when there are multiple performances
- # (assuming the matchfile names consist of the piece name + a
- # suffix). When there is only a single performance per piece (like for
- # magaloff/zeilinger), we assume musicxml and matchfile have the same
- # name (up to the file extension), so we switch by_prefix of in the
- # file pairing. In that way files are only paired if they are have
- # identical names (up to the extension).
- for piece, files in pair_files(folders, by_prefix=not quirks).items():
- if pieces is not None and piece not in pieces:
- continue
- # load the score
- xml_fn = files['mxml'].pop()
- LOGGER.info('Processing {}'.format(xml_fn))
- part = load_musicxml(xml_fn)
- bm = part.beat_map
-
- # get indices of the unique onsets
- if gracenotes == 'remove':
- # Remove grace notes
- remove_grace_notes(part)
+ quirks = False
+ if dataset_name == 'asap':
+ name = '/'.join(str(piece).split('asap')[1].split('/')[1:-1])
+ else:
+ name = piece.split('/')[-1].split('.')[0]
+ #quirks = True
+
+ LOGGER.info('Processing {}'.format(piece))
+
+ part = load_musicxml(piece)
+ part = partitura.score.merge_parts(part)
+ part = partitura.score.unfold_part_maximal(part, update_ids=dataset_name != '4x22')
+ bm = part.beat_map
+
+ # get indices of the unique onsets
+ if gracenotes == 'remove':
+ # Remove grace notes
+ remove_grace_notes(part)
+ else:
+ # expand grace note durations (necessary for correct computation of
+ # targets)
+ expand_grace_notes(part)
+ basis, bf_names = partitura.musicanalysis.make_note_feats(part, list(all_basis_functions))
+
+ nid_dict = dict((n.id, i) for i, n in enumerate(part.notes_tied))
+
+ for performance in performances:
+ if dataset_name == 'asap':
+ alignment = load_alignment_from_ASAP(performance)
+ ppart = partitura.load_performance_midi(str(performance).split("_note_alignments/")[0] + ".mid")
else:
- # expand grace note durations (necessary for correct computation of
- # targets)
- expand_grace_notes(part)
+ ppart, alignment = load_match(performance, first_note_at_zero=True)
+
+ #if quirks: todo: check if quirks are really needed
+ # for n in alignment:
+ # if n['label'] == 'match':
+ # n['score_id'] = n['score_id'].split('-')[0]
+
+ assert len(ppart.performedparts) == 1
+ ppart = ppart.performedparts[0]
+
+ # compute the targets
+ targets, snote_ids = perf_codec.encode(part, ppart, alignment)
+
+ matched_subset_idxs = np.array([nid_dict[nid] for nid in snote_ids])
+ basis_matched = basis[matched_subset_idxs]
+
+ score_onsets = bm([n.start.t for n in part.notes_tied])[matched_subset_idxs]
+ unique_onset_idxs = get_unique_onset_idxs(score_onsets)
+
+ i = -2 if dataset_name == 'asap' else -1
+
+ performance_name = str(performance).split('/')[i]
- # compute the basis functions
- basis, bf_names = make_basis(part, all_basis_functions)
+ data.append((basis_matched, bf_names, targets, unique_onset_idxs, name, performance_name))
+ return data
- # map the basis names returned for this piece to their global
- # indices
- bf_idx = np.array([bf_idx_map.setdefault(name, len(bf_idx_map))
- for i, name in enumerate(bf_names)])
+class ProcessPiece:
+ def __init__(self, args):
+ self.args = args
- # a dictionary from note id to index. We need this to select the
- # subset of rows in the `basis` array that have a matching entry in
- # the targets.
- nid_dict = dict((n.id, i) for i, n in enumerate(part.notes_tied))
+ def __call__(self, piece):
+ return process_piece(piece, *self.args)
- for match in files['match']:
- # if not '_p01' in match:
- # continue
- name = os.path.splitext(os.path.basename(match))[0]
-
- LOGGER.info('Processing {}'.format(match))
+def filter_blocklist(pieces):
+ blocklist = ['Liszt/Sonata', ]
+ pieces_filtered = []
+ for p in pieces:
+ flag = True
+ for b in blocklist:
+ if b in str(p):
+ flag = False
+ if flag:
+ pieces_filtered.append(p)
+ print(f"filtered out {len(pieces) - len(pieces_filtered)} pieces!")
+ return pieces_filtered
- # load the performed part and the alignment from the match file
- ppart, alignment = load_match(match, first_note_at_zero=True)
- if quirks:
- for n in alignment:
- if n['label'] == 'match':
- n['score_id'] = n['score_id'].split('-')[0]
+def make_datasets(model_specs, root_folder, dataset_name, gracenotes='remove', processes=0):
+ assert dataset_name in ['4x22', 'magaloff', 'asap']
- # compute the targets
- targets, snote_ids = perf_codec.encode(part, ppart, alignment)
-
+ quirks = dataset_name == 'magaloff'
- matched_subset_idxs = np.array([nid_dict[nid] for nid in snote_ids])
- basis_matched = basis[matched_subset_idxs]
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ all_targets = list(set([n for model_spec in model_specs
+ for n in model_spec['parameter_names']]))
- score_onsets = bm([n.start.t for n in part.notes_tied])[matched_subset_idxs]
- unique_onset_idxs = get_unique_onset_idxs(score_onsets)
+ perf_codec = get_performance_codec(all_targets)
- data.append((basis_matched, bf_idx, targets, unique_onset_idxs, name))
-
- return piece_data_to_datasets(data, bf_idx_map, model_specs)
+ bf_idx_map = {}
+
+ all_basis_functions = set([n for model_spec in model_specs
+ for n in model_spec['basis_functions']])
+
+ if dataset_name == 'asap':#todo: fix loading of Liszt/Sonata
+ assert 'asap' in root_folder.split('/')[-1], 'Root folder name must contain "asap"'
+ pieces = list(Path(root_folder).rglob("*/xml_score.musicxml"))
+ pieces = filter_blocklist(pieces)
+ performances = [list(Path(piece).parent.glob("*_note_alignments/note_alignment.tsv")) for piece in pieces]
+ piece_performances = zip(pieces, performances)
+ else:
+ mxml_folder = root_folder + ('xml' if dataset_name == 'magaloff' else 'musicxml')
+ match_folder = root_folder + 'match'
+ folders = dict(mxml=mxml_folder, match=match_folder)
+ paired_files = pair_files(folders, by_prefix=not quirks)
+ piece_performances = []#[(pf['mxml'][0], list(pf['match'])) for pf in paired_files]
+ for pf in paired_files.items():
+ if 'chopin_op35_Mv3' in pf[0]:#todo: repair loading, do not filter...
+ continue
+ piece_performances.append((list(pf[1]['mxml'])[0], list(pf[1]['match'])))
+
+ if processes <= 0:
+ processes = multiprocessing.cpu_count()
+
+ if processes > 1:
+ pool = Pool(processes)
+ pieces = list(pool.map(ProcessPiece((perf_codec, all_basis_functions, gracenotes, dataset_name)), piece_performances))
+ else:
+ pieces = [process_piece(p, perf_codec, all_basis_functions, gracenotes, dataset_name) for p in piece_performances]
+ pieces = [list(i) for sublist in pieces for i in sublist]
+
+ for piece in pieces:
+ bf_idx = np.array([bf_idx_map.setdefault(name, len(bf_idx_map))
+ for i, name in enumerate(piece[1])])
+ piece[1] = bf_idx
+
+ data = [tuple(l) for l in pieces]
+
+ return piece_data_to_datasets(data, bf_idx_map, model_specs)
def piece_data_to_datasets(data, bf_idx_map, model_specs):
# total number of basis functions in the dataset
- #n_basis = len(bf_idx_map)
+ # n_basis = len(bf_idx_map)
idx_bf_map = dict((v, k) for k, v in bf_idx_map.items())
# print(bf_idx_map)
# print(bf_idx_inv_map)
@@ -165,7 +193,7 @@ def piece_data_to_datasets(data, bf_idx_map, model_specs):
output_names_per_model = []
for m_spec in model_specs:
# the global indices of the basis functions that this model needs
- model_idx = np.concatenate([np.where(input_basis==n)[0]
+ model_idx = np.concatenate([np.where(input_basis == n)[0]
for n in m_spec['basis_functions']])
# trg_idx = np.array([perf_codec.parameter_names.index(n) for n in m_spec['targets']])
n_basis = len(model_idx)
@@ -176,33 +204,32 @@ def piece_data_to_datasets(data, bf_idx_map, model_specs):
m_datasets = []
m_input_names = []
- for bf, idx, targets, uox, name in data:
+ for bf, idx, targets, uox, name, perf_name in data:
# idx: the global indices that this piece has
# the subset of basisfunctions that this model is interested in:
useful = np.isin(idx, model_idx)
# idx mapped to the subset of basisfunctions for this model
- model_idx_subset = np.array([np.where(model_idx==i)[0][0]
+ model_idx_subset = np.array([np.where(model_idx == i)[0][0]
for i in idx[useful]])
# select only the required bfs
bf = bf[:, useful]
# select only the required targets
targets = np.array([targets[n] for n in m_spec['parameter_names']]).T
-
- if m_spec['onsetwise']:
+ if m_spec['onsetwise']:
bf = notewise_to_onsetwise(bf, uox)
targets = notewise_to_onsetwise(targets, uox)
-
- ds = BasisMixerDataSet(bf, model_idx_subset, n_basis,
- targets, m_spec['seq_len'], name)
-
- m_datasets.append(ds)
+ ds = BasisMixerDataSet(bf, model_idx_subset, n_basis, targets,
+ input_names_per_model[-1], output_names_per_model[-1],
+ m_spec['seq_len'], name, perf_name)
+
+ m_datasets.append(ds)
dataset_per_model.append(m_datasets)
-
+
return zip(dataset_per_model, input_names_per_model, output_names_per_model)
@@ -260,19 +287,22 @@ class BasisMixerDataSet(Dataset):
See Parameters Section.
"""
- def __init__(self, basis, idx, n_basis, targets, seq_len=1, name=None):
+
+ def __init__(self, basis, idx, n_basis, targets, input_names, output_names, seq_len=1, name=None, perf_name=None):
self.basis = basis
self.idx = idx
self.n_basis = n_basis
self.targets = targets
self.seq_len = seq_len
self.name = name
+ self.perf_name = perf_name
+ self.input_names = input_names
+ self.output_names = output_names
@property
def piecewise(self):
return self.seq_len == -1
-
def __getitem__(self, i):
if self.piecewise:
return self._get_item_piecewise(i)
diff --git a/basismixer/helper/__init__.py b/basismixer/helper/__init__.py
new file mode 100644
index 0000000..a1e402a
--- /dev/null
+++ b/basismixer/helper/__init__.py
@@ -0,0 +1,4 @@
+from .visualization import performance_player, show_score
+from .data import init_dataset
+from .rendering import load_model, compute_basis_from_xml, render_midi
+from .plotting import plot_basis
diff --git a/basismixer/helper/data.py b/basismixer/helper/data.py
new file mode 100644
index 0000000..59b0b9e
--- /dev/null
+++ b/basismixer/helper/data.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+
+import os
+import json
+import argparse
+import tarfile
+import io
+from urllib.request import urlopen
+import urllib
+import re
+import warnings
+
+from IPython.display import display, HTML, Audio, update_display
+import ipywidgets as widgets
+import appdirs
+
+from basismixer.utils import pair_files
+
+TIMEOUT = 2
+REPO_NAME = 'vienna4x22_rematched'
+DATASET_BRANCH = 'master'
+OWNER = 'OFAI'
+DATASET_URL = 'https://api.github.com/repos/{}/{}/tarball/{}'.format(OWNER, REPO_NAME, DATASET_BRANCH)
+
+# oggs will be downloaded from here
+OGG_URL_BASE = 'https://spocs.duckdns.org/vienna_4x22/'
+
+TMP_DIR = appdirs.user_cache_dir('basismixer')
+CFG_FILE = os.path.join(TMP_DIR, 'cache.json')
+CFG = None
+# DATASET_DIR will be set to the path of our data
+DATASET_DIR = None
+PIECES = ()
+PERFORMERS = ()
+SCORE_PERFORMANCE_PAIRS = None
+
+def load_cfg():
+ global CFG
+ if os.path.exists(CFG_FILE):
+ with open(CFG_FILE) as f:
+ CFG = json.load(f)
+ else:
+ CFG = {'last_dataset_dir': None}
+
+def save_cfg():
+ with open(CFG_FILE, 'w') as f:
+ json.dump(CFG, f)
+
+def get_datasetdir():
+ """Get the SHA of the latest commit and return the corresponding
+ datast directory path.
+
+ """
+ commit_url = ('https://api.github.com/repos/{}/{}/commits/{}'
+ .format(OWNER, REPO_NAME, DATASET_BRANCH))
+ try:
+
+ with urlopen(commit_url, timeout=TIMEOUT) as response:
+ commit = json.load(response)
+ repo_dirname = '{}-{}-{}'.format(OWNER, REPO_NAME, commit['sha'][:7])
+ return os.path.join(TMP_DIR, repo_dirname)
+
+ except urllib.error.URLError as e:
+ # warnings.warn('{} (url: {})'.format(e, commit_url))
+ return CFG.get('last_dataset_dir', None)
+ except Exception as e:
+ # warnings.warn('{} (url: {})'.format(e, commit_url))
+ return CFG.get('last_dataset_dir', None)
+
+
+def init_dataset():
+ global DATASET_DIR, PIECES, PERFORMERS, SCORE_PERFORMANCE_PAIRS
+
+ load_cfg()
+
+ status = widgets.Output()
+ display(status)
+ status.clear_output()
+
+ DATASET_DIR = get_datasetdir()
+
+ if DATASET_DIR is None:
+ status.append_stdout('No internet connection?\n')
+
+ elif os.path.exists(DATASET_DIR):
+
+ status.append_stdout('Vienna 4x22 Corpus already downloaded.\n')
+ status.append_stdout('Data is in {}'.format(DATASET_DIR))
+
+ else:
+ status.append_stdout('Downloading Vienna 4x22 Corpus...')
+ try:
+ try:
+ urldata = urlopen(DATASET_URL).read()
+ except urllib.error.URLError as e:
+ # warnings.warn('{} (url: {})'.format(e, DATASET_URL))
+ status.append_stdout('error. No internet connection?\n')
+ return
+
+ with tarfile.open(fileobj=io.BytesIO(urldata)) as archive:
+ folder = next(iter(archive.getnames()), None)
+ archive.extractall(TMP_DIR)
+ if folder:
+ DATASET_DIR = os.path.join(TMP_DIR, folder)
+ CFG['last_dataset_dir'] = DATASET_DIR
+ save_cfg()
+ # assert DATASET_DIR == os.path.join(TMP_DIR, folder)
+
+ except Exception as e:
+ status.append_stdout('\nError: {}'.format(e))
+ return None
+ status.append_stdout('done\nData is in {}'.format(DATASET_DIR))
+
+ if DATASET_DIR is None:
+ return None
+
+ folders = dict(musicxml=os.path.join(DATASET_DIR, 'musicxml'),
+ match=os.path.join(DATASET_DIR, 'match'))
+
+ SCORE_PERFORMANCE_PAIRS = []
+ paired_files = pair_files(folders)
+ pieces = sorted(paired_files.keys())
+ for piece in pieces:
+ xml_fn = paired_files[piece]['musicxml'].pop()
+ for match_fn in sorted(paired_files[piece]['match']):
+ SCORE_PERFORMANCE_PAIRS.append((xml_fn, match_fn))
+
+ fn_pat = re.compile('(.*)_(p[0-9][0-9])\.match')
+ match_files = os.listdir(os.path.join(DATASET_DIR, 'match'))
+ pieces, performers = zip(*[m.groups() for m in [fn_pat.match(fn)
+ for fn in match_files]
+ if m])
+ PIECES = sorted(set(pieces))
+ PERFORMERS = sorted(set(performers))
+
+
+if __name__ == '__main__':
+ init_dataset()
diff --git a/basismixer/helper/plotting.py b/basismixer/helper/plotting.py
new file mode 100644
index 0000000..9119e9e
--- /dev/null
+++ b/basismixer/helper/plotting.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+import numpy as np
+# import argparse
+import matplotlib.pyplot as plt
+from basismixer.helper.visualization import make_plot
+
+def plot_basis(basis, names, onsets=None, title=None):
+ n_basis = basis.shape[1]
+
+ if onsets is None:
+ x = np.arange(len(basis))
+ else:
+ x = onsets
+
+ w = len(x)/30
+ h = n_basis
+
+ fig, axs = plt.subplots(n_basis, sharex=True,
+ gridspec_kw={'hspace': 0})
+ if n_basis == 1:
+ axs = [axs]
+
+ fig.set_size_inches(w, h)
+
+ if title:
+ fig.suptitle(title)
+
+ for i, name in enumerate(names):
+ axs[i].fill_between(x, 0, basis[:, i], label=name)
+ axs[i].legend(frameon=False, loc='upper left')
+
+ fig.tight_layout()
+
+ if title:
+ fig.subplots_adjust(top=0.95)
+
+ # fig.savefig(out_fn)
+
+def plot_predictions_and_targets(predictions, targets):
+ param_names = predictions.dtype.names
+ n_params = len(param_names)
+ fig, axs = plt.subplots(n_params, sharex=True)
+
+ fig.set_size_inches(len(predictions) / 30, n_params)
+ for i, pn in enumerate(param_names):
+ axs[i].plot(predictions[pn], color='firebrick',
+ label='predictions')
+ if targets is not None:
+ axs[i].plot(targets[:, i], color='blue', label='targets')
+ axs[i].set_title(pn)
+ axs[i].legend(frameon=False, loc='upper left')
+
+ fig.tight_layout()
+
+
+def plot_predictions(predictions, onsets=None,
+ param_names=None):
+
+ if param_names is None:
+ param_names = predictions.dtype.names
+ fig, axs = plt.subplots(len(param_names),
+ sharex=True,
+ gridspec_kw={'hspace': 0.15})
+ plt.subplots_adjust(left=0.07, right=0.99, top=.99, bottom=0.1)
+
+ make_plot(fig, axs, predictions[param_names], onsets)
+
+
+
+
+# def main():
+# parser = argparse.ArgumentParser(description="Do something")
+# parser.add_argument("file", help="some file")
+# args = parser.parse_args()
+
+
+# if __name__ == '__main__':
+# main()
diff --git a/basismixer/helper/predictions.py b/basismixer/helper/predictions.py
new file mode 100644
index 0000000..9d7c96d
--- /dev/null
+++ b/basismixer/helper/predictions.py
@@ -0,0 +1,133 @@
+import json
+import logging
+import os
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, ConcatDataset
+# from torch.utils.data.sampler import SubsetRandomSampler
+
+from partitura.utils import partition
+from basismixer.predictive_models import (construct_model as c_model,
+ SupervisedTrainer,
+ MSELoss)
+from basismixer.utils import load_pyc_bz, save_pyc_bz
+
+logging.basicConfig(level=logging.INFO)
+LOGGER = logging.getLogger(__name__)
+
+RNG = np.random.RandomState(1984)
+
+def construct_model(config, in_names, out_names, out_dir):
+ model_cfg = config['model'].copy()
+ model_cfg['args']['input_names'] = in_names
+ model_cfg['args']['input_size'] = len(in_names)
+ model_cfg['args']['output_names'] = out_names
+ model_cfg['args']['output_size'] = len(out_names)
+ model_cfg['args']['input_type'] = 'onsetwise' if config['onsetwise'] else 'notewise'
+ model_name = ('-'.join(out_names) +
+ '-' + ('onsetwise' if config['onsetwise'] else 'notewise'))
+ model_out_dir = os.path.join(out_dir, model_name)
+ if not os.path.exists(model_out_dir):
+ os.mkdir(model_out_dir)
+ # save model config for later saving model
+ config_out = os.path.join(model_out_dir, 'config.json')
+ LOGGER.info('Saving config in {0}'.format(config_out))
+ json.dump(jsonize_dict(model_cfg),
+ open(config_out, 'w'),
+ indent=2)
+ model = c_model(model_cfg)
+
+ return model, model_out_dir
+
+def setup_output_directory(out_dir='/tmp/trained_models'):
+ if not os.path.exists(out_dir):
+ os.mkdir(out_dir)
+ return out_dir
+
+def jsonize_dict(input_dict):
+ out_dict = dict()
+ for k, v in input_dict.items():
+ if isinstance(v, np.ndarray):
+ out_dict[k] = v.tolist()
+ elif isinstance(v, dict):
+ out_dict[k] = jsonize_dict(v)
+ else:
+ out_dict[k] = v
+ return out_dict
+
+def split_datasets_by_piece(datasets, test_size=0.2, valid_size=0.2):
+
+ by_piece = partition(lambda d: '_'.join(d.name.split('_')[:-1]), datasets)
+ pieces = list(by_piece.keys())
+ RNG.shuffle(pieces)
+
+ n_test = max(1, int(np.round(test_size*len(pieces))))
+ n_valid = max(1, int(np.round(valid_size*len(pieces))))
+ n_train = len(pieces) - n_test - n_valid
+
+ if n_train < 1:
+ raise Exception('Not enough pieces to split datasets according '
+ 'to the specified test/validation proportions')
+
+ test_pieces = pieces[:n_test]
+ valid_pieces = pieces[n_test:n_test+n_valid]
+ train_pieces = pieces[n_test+n_valid:]
+
+ test_set = [d for pd in [by_piece[p] for p in test_pieces] for d in pd]
+ valid_set = [d for pd in [by_piece[p] for p in valid_pieces] for d in pd]
+ train_set = [d for pd in [by_piece[p] for p in train_pieces] for d in pd]
+
+ return (ConcatDataset(train_set),
+ ConcatDataset(valid_set),
+ ConcatDataset(test_set))
+
+
+def split_datasets(datasets, test_size=0.2, valid_size=0.2):
+
+ n_pieces = len(datasets)
+
+ dataset_idx = np.arange(n_pieces)
+ RNG.shuffle(dataset_idx)
+ len_test = int(n_pieces * test_size)
+ len_valid = np.maximum(int((n_pieces - len_test) * valid_size), 1)
+
+ test_idxs = dataset_idx[:len_test]
+ valid_idxs = dataset_idx[len_test:len_test + len_valid]
+ train_idxs = dataset_idx[len_test + len_valid:]
+
+ return (ConcatDataset([datasets[i] for i in train_idxs]),
+ ConcatDataset([datasets[i] for i in valid_idxs]),
+ ConcatDataset([datasets[i] for i in test_idxs]))
+
+
+
+
+def train_model(model, train_set, valid_set,
+ config, out_dir):
+ batch_size = config['train_args'].pop('batch_size')
+
+ #### Create train and validation data loaders #####
+ train_loader = DataLoader(train_set,
+ batch_size=batch_size,
+ shuffle=True)
+ valid_loader = DataLoader(valid_set,
+ batch_size=batch_size,
+ shuffle=False)
+
+ loss = MSELoss()
+
+ ### Construct the optimizer ####
+ optim_name, optim_args = config['train_args']['optimizer']
+ optim = getattr(torch.optim, optim_name)
+ config['train_args']['optimizer'] = optim(model.parameters(), **optim_args)
+ train_args = config['train_args']
+ train_args.pop('seq_len', None)
+ trainer = SupervisedTrainer(model=model,
+ train_loss=loss,
+ valid_loss=loss,
+ train_dataloader=train_loader,
+ valid_dataloader=valid_loader,
+ out_dir=out_dir,
+ **config['train_args'])
+ trainer.train()
diff --git a/basismixer/helper/rendering.py b/basismixer/helper/rendering.py
new file mode 100644
index 0000000..52563eb
--- /dev/null
+++ b/basismixer/helper/rendering.py
@@ -0,0 +1,140 @@
+import json
+import os
+
+import torch
+import numpy as np
+import subprocess
+import soundfile
+import tempfile
+import logging
+
+from IPython.display import display, Audio
+
+from partitura import save_performance_midi, load_musicxml, load_score_midi
+from partitura.score import expand_grace_notes, unfold_part_maximal
+from basismixer.predictive_models import FullPredictiveModel, construct_model
+from basismixer.performance_codec import get_performance_codec
+from partitura.musicanalysis import make_note_feats
+
+from basismixer.helper.predictions import setup_output_directory
+
+LOGGER = logging.getLogger(__name__)
+
+def path_to_trained_models(path=setup_output_directory()):
+ if not os.path.exists(path):
+ print('Models not found! Using sample models')
+ path = './sample_data/models'
+ return path
+
+
+def render_midi(midi_fn):
+
+ with tempfile.NamedTemporaryFile() as out_file:
+ cmd = ['timidity', '-E', 'F', 'reverb=0', 'F', 'chorus=0',
+ '--output-mono', '-Ov', '-o', out_file.name, midi_fn]
+ try:
+ ps = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if ps.returncode != 0:
+ LOGGER.error('Command {} failed with code {} (stderr: {})'
+ .format(cmd, ps.returncode, ps.stderr.decode('UTF8')))
+ return False
+ except FileNotFoundError as f:
+ LOGGER.error('Executing "{}" returned {}.'
+ .format(' '.join(cmd), f))
+ return False
+ data, fs = soundfile.read(out_file.name)
+ aw = display(Audio(data=data, rate=fs, autoplay=True), display_id=True)
+ return aw
+
+
+def load_model(models_dir):
+ models = []
+ for f in os.listdir(models_dir):
+ path = os.path.join(models_dir, f)
+ if os.path.isdir(path):
+ model_config = json.load(open(os.path.join(path, 'config.json')))
+ params = torch.load(os.path.join(path, 'best_model.pth'),
+ map_location=torch.device('cpu'))['state_dict']
+
+ model = construct_model(model_config, params)
+ models.append(model)
+
+
+
+ output_names = list(set([name for out_name in [m.output_names for m in models] for name in out_name]))
+ input_names = list(set([name for in_name in [m.input_names for m in models] for name in in_name]))
+ input_names.sort()
+ output_names.sort()
+
+ default_values = dict(
+ velocity_trend=64,
+ velocity_dev=0,
+ beat_period_standardized=0,
+ timing=0,
+ articulation_log=0,
+ beat_period_mean=0.5,
+ beat_period_std=0.1)
+ all_output_names = list(default_values.keys())
+ full_model = FullPredictiveModel(models, input_names,
+ all_output_names, default_values)
+
+ not_in_model_names = set(all_output_names).difference(output_names)
+
+ print('Trained models include the following parameters:\n'
+ + '\n'.join(output_names) + '\n\n'
+ 'The following parameters will use default values:\n'+
+ '\n' + '\n'.join(['{0}:{1:.2f}'.format(k, default_values[k])
+ for k in not_in_model_names]))
+
+
+ return full_model, output_names
+
+def sanitize_performed_part(ppart):
+ """Avoid negative durations in notes.
+
+ """
+ for n in ppart.notes:
+
+ if n['note_off'] < n['note_on']:
+ n['note_off'] = n['note_on']
+
+ if n['sound_off'] < n['note_off']:
+ n['sound_off'] = n['note_off']
+
+
+def post_process_predictions(predictions):
+ max_articulation = 1.5
+ max_bps = 1
+ max_timing = 0.2
+ predictions['articulation_log'] = np.clip(predictions['articulation_log'],
+ -max_articulation, max_articulation)
+ predictions['velocity_dev'] = np.clip(predictions['velocity_dev'], 0, 0.8)
+ predictions['beat_period_standardized'] = np.clip(predictions['beat_period_standardized'],
+ -max_bps, max_bps)
+ predictions['timing'] = np.clip(predictions['timing'],
+ -max_timing, max_timing)
+ predictions['velocity_trend'][predictions['velocity_trend'] > 0.8] = 0.8
+
+
+
+
+def compute_basis_from_xml(xml_fn, input_names):
+ # Load MusicXML file
+ part = load_musicxml(xml_fn, force_note_ids=True)
+ assert len(part.parts) == 1
+ part = part.parts[0]
+ expand_grace_notes(part)
+ part = unfold_part_maximal(part)
+
+ # Compute basis functions
+ _basis, bf_names = make_note_feats(part, list(set([bf.split('.')[0] for bf in input_names])))
+ basis = np.zeros((len(_basis), len(input_names)))
+ for i, n in enumerate(input_names):
+ try:
+ ix = bf_names.index(n)
+ except ValueError:
+ continue
+ basis[:, i] = _basis[:, ix]
+
+ return basis, part
+
diff --git a/basismixer/helper/visualization.py b/basismixer/helper/visualization.py
new file mode 100644
index 0000000..3948eb7
--- /dev/null
+++ b/basismixer/helper/visualization.py
@@ -0,0 +1,310 @@
+#!/usr/bin/env ipython
+
+import threading
+from functools import partial
+import time
+import io
+import os
+import logging
+from urllib.request import urlopen
+
+from IPython.display import display, HTML, Audio, update_display, Image
+# from ipywidgets import interact, interactive, fixed
+import ipywidgets as widgets
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.interpolate import interp1d
+import soundfile
+
+import partitura
+# import partitura.score as score
+from partitura.utils import partition
+import basismixer
+import basismixer.performance_codec as pc
+
+LOGGER = logging.getLogger(__name__)
+
+OGG_URL_BASE = 'https://basismixer.cp.jku.at/static/vienna4x22/'
+# PERF_CODEC = pc.PerformanceCodec(pc.TimeCodec(), pc.NotewiseDynamicsCodec())
+PERF_CODEC = pc.PerformanceCodec(pc.TimeCodec(normalization='beat_period_standardized'),
+ pc.OnsetwiseDecompositionDynamicsCodec())
+
+plt.rcParams.update({'font.size': 8})
+
+def show_score(piece):
+ if isinstance(piece, int):
+ piece = data.PIECES[piece]
+ display(Image(os.path.join(data.DATASET_DIR, 'png', '{}.png'.format(piece))))
+
+
+def load_performance_audio(piece, performer):
+ url = '{}{}_{}.ogg'.format(OGG_URL_BASE, piece, performer)
+ try:
+ audio, fs = soundfile.read(io.BytesIO(urlopen(url).read()), always_2d=True)
+ audio = audio.mean(1)
+ return audio, fs
+ except:
+ return None, None
+
+
+def get_performance_info(piece, performer):
+ assert data.DATASET_DIR
+ musicxml_fn = os.path.join(data.DATASET_DIR, 'musicxml', '{}.musicxml'.format(piece))
+ match_fn = os.path.join(data.DATASET_DIR, 'match', '{}_{}.match'.format(piece, performer))
+
+ part = partitura.load_musicxml(musicxml_fn)
+
+ ppart, alignment = partitura.load_match(match_fn, first_note_at_zero=True)
+ return part, ppart, alignment
+
+
+def show_performance(piece, performer, fig, axs, keep_zoom):
+ part, ppart, alignment = get_performance_info(piece, performer)
+ targets, snote_ids = PERF_CODEC.encode(part, ppart, alignment)
+
+ # we convert to f8 to avoid numerical problems when computing means
+ dtype = [(n, 'f8') for n in targets.dtype.names]
+ targets = targets.astype(dtype)
+
+ part_by_id = dict((n.id, n) for n in part.notes_tied)
+ ppart_by_id = dict((n['id'], n) for n in ppart.notes)
+ s_to_p_id = dict((a['score_id'], a['performance_id'])
+ for a in alignment if a['label'] == 'match')
+ s_notes = [part_by_id[n] for n in snote_ids]
+ p_notes = [ppart_by_id[s_to_p_id[n]] for n in snote_ids]
+
+ bm = part.beat_map
+ s_onsets = bm([n.start.t for n in s_notes])
+ p_onsets = np.array([n['note_on'] for n in p_notes])
+ measure_times = np.array([(m.start.t, '{}'.format(m.number)) for m in
+ part.iter_all(partitura.score.Measure)],
+ dtype=[('t', 'f4'), ('label', 'U100')])
+
+ measure_times['t'] = bm(measure_times['t'])
+
+ make_plot(fig, axs, targets, onsets=s_onsets, xlabel='Measure number',
+ xticks=measure_times, keep_zoom=keep_zoom) # , title='{} {}'.format(piece, performer))
+
+ s_times = np.r_[s_onsets, s_notes[-1].end.t]
+ p_times = np.r_[p_onsets, p_notes[-1]['note_off']]
+ # score_perf_map = interp1d(s_onsets, p_onsets, bounds_error=False, fill_value='extrapolate')
+ score_perf_map = interp1d(s_times, p_times, bounds_error=False, fill_value=(p_times[0], p_times[-1]))
+
+ return score_perf_map
+
+
+def make_plot(fig, axs, targets, onsets=None, xticks=None, title=None,
+ xlabel=None, start=None, end=None, keep_zoom=False):
+ names = targets.dtype.names
+
+ xlims = []
+ ylims = []
+ for ax in axs:
+ if keep_zoom:
+ xlims.append(list(ax.get_xlim()))
+ ylims.append(list(ax.get_ylim()))
+ ax.clear()
+
+ n_targets = len(names)
+
+ if onsets is None:
+ x = np.arange(len(targets))
+ else:
+ x = onsets
+
+ w = len(x)/30
+ h = n_targets
+
+ if end is not None:
+ idx = x < end
+ x = x[idx]
+ targets = targets[idx]
+
+ if start is not None:
+ idx = x >= start
+ x = x[idx]
+ targets = targets[idx]
+
+ if n_targets == 1:
+ axs = [axs]
+
+ # fig.set_size_inches(w, h)
+
+ if title:
+ fig.suptitle(title)
+
+ by_onset = partition(lambda ix: ix[1], enumerate(x))
+ for k, v in by_onset.items():
+ by_onset[k] = np.array([i for i, _ in v])
+
+ for i, name in enumerate(names):
+ target = targets[name]
+ targets[np.isnan(target)] = 0
+
+ axs[i].plot(x, target, '.', label=name)
+
+ if xticks is not None:
+ axs[i].set_xticks(xticks['t'])
+ axs[i].set_xticklabels(xticks['label'])
+ axs[i].xaxis.grid()
+
+ tt = []
+ vv = []
+ for t, v in by_onset.items():
+ tt.append(t)
+ vv.append(np.mean(target[v]))
+
+ # axs[i].plot(tt, vv, label='{} (mean)'.format(name))
+ axs[i].plot(tt, vv)
+
+ axs[i].legend(frameon=False, loc=2)
+
+ if keep_zoom:
+ axs[0].set_xlim(xlims[0])
+ for xlim, ylim, ax in zip(xlims, ylims, axs):
+ ax.set_ylim(ylim)
+
+ return fig, axs
+
+
+def performance_player():
+ status = widgets.Output()
+ piece_dd = widgets.Dropdown(options=data.PIECES, description='Piece:')
+ performer_dd = widgets.Dropdown(options=data.PERFORMERS, description='Performer:')
+ keep_lims_chbox = widgets.Checkbox(value=False, description='Keep zoom')
+ reset_lims = widgets.Button(description='Zoom to fit',
+ button_style='', # 'success', 'info', 'warning', 'danger' or ''
+ tooltip='Zoom to fit',
+ icon='check'
+ )
+
+
+ if data.PIECES and data.PERFORMERS:
+ current_performance = [data.PIECES[0], data.PERFORMERS[0]]
+ else:
+ current_performance = [None, None]
+
+ audio, fs = None, None
+ score_perf_map = None
+ aw = None
+ keep_zoom = False
+
+ fig, axs = plt.subplots(len(PERF_CODEC.parameter_names),
+ sharex=True,
+ gridspec_kw={'hspace': 0.15})
+ plt.subplots_adjust(left=0.07, right=0.99, top=.99, bottom=0.1)
+
+ def update_current_perf(info, item):
+ nonlocal current_performance
+ if item == 'piece':
+ current_performance[0] = info['new']
+ else:
+ current_performance[1] = info['new']
+ set_performance(*current_performance)
+
+ def set_performance(piece, performer):
+ nonlocal audio, fs, score_perf_map, aw
+
+ audio, fs = load_performance_audio(piece, performer)
+ score_perf_map = show_performance(piece, performer, fig, axs, keep_zoom)
+
+ if keep_zoom:
+ s, e = axs[0].get_xlim()
+ start = max(0, int(score_perf_map(s)*fs))
+ end = min(len(audio), int(score_perf_map(e)*fs))
+ excerpt = audio[start:end]
+ else:
+ excerpt = audio
+ if aw is None:
+ aw = display(Audio(data=excerpt, rate=fs, autoplay=True), display_id=True)
+ else:
+ aw.update(Audio(data=excerpt, rate=fs, autoplay=True))
+
+ def set_keep_zoom(v):
+ nonlocal keep_zoom
+ keep_zoom = v['new']
+
+ def do_reset_zoom(v):
+ nonlocal axs, fig
+ for ax in axs:
+ ax.autoscale()
+ ax.autoscale_view()
+ fig.canvas.draw()
+
+ piece_dd.observe(partial(update_current_perf, item='piece'), names=['value'])
+ performer_dd.observe(partial(update_current_perf, item='performer'), names=['value'])
+ keep_lims_chbox.observe(set_keep_zoom, names=['value'])
+ reset_lims.on_click(do_reset_zoom)
+
+ display(widgets.HBox([piece_dd, performer_dd, keep_lims_chbox, reset_lims]))
+ display(status)
+
+ set_performance(*current_performance)
+
+ cursor = []
+ play_range = [None, None]
+ thread_stop = None
+
+ def on_mouse_down(event):
+ nonlocal play_range, thread_stop
+ if thread_stop:
+ thread_stop.set()
+ play_range[0] = event.xdata
+
+ def on_mouse_up(event):
+ nonlocal play_range, cursor, thread_stop
+ play_range[1] = event.xdata
+ play_range.sort()
+
+ while cursor:
+ cursor.pop().remove()
+
+ for ax in axs:
+ cursor.append(ax.fill_betweenx(ax.get_ylim(), play_range[0], play_range[1], alpha=.2, color='gray'))
+
+ fig.canvas.draw()
+
+ start = max(0, int(score_perf_map(play_range[0])*fs))
+ end = min(len(audio), int(score_perf_map(play_range[1])*fs))
+ aw.display(Audio(data=audio[start:end], rate=fs, autoplay=True))
+
+ # duration = play_range[1] - play_range[0]
+ # thread_stop = threading.Event()
+ # thread = threading.Thread(
+ # target=time_cursor_thread,
+ # args=(fig, axs[0], play_range[0], play_range[1], duration, thread_stop))
+ # thread.start()
+
+ cid1 = fig.canvas.mpl_connect('button_press_event', on_mouse_down)
+ cid2 = fig.canvas.mpl_connect('button_release_event', on_mouse_up)
+
+
+def time_cursor_thread(fig, ax, start, end, duration, ev, rate=1):
+
+ color='black'
+ x = start
+ vline = ax.axvline(x, c=color)
+ delta_x = (end-start)/(duration*rate)
+ delta_t = 1/rate
+
+ while not ev.is_set() and x < end:
+ fig.canvas.draw()
+ vline.set(xdata=np.array([x, x]))
+ # fig.canvas.blit(ax.bbox) # doesn't reliably update
+ x += delta_x
+ time.sleep(delta_t)
+
+ vline.remove()
+ fig.canvas.draw()
+
+
+def to_matched_score(note_pairs, beat_map):
+ ms = []
+ for sn, n in note_pairs:
+ sn_on, sn_off = beat_map([sn.start.t, sn.end.t])
+ sn_dur = sn_off - sn_on
+ n_dur = n['sound_off'] - n['note_on']
+ ms.append((sn_on, sn_dur, sn.midi_pitch, n['note_on'], n_dur, n['velocity']))
+ fields = [('onset', 'f4'), ('duration', 'f4'), ('pitch', 'i4'),
+ ('p_onset', 'f4'), ('p_duration', 'f4'), ('velocity', 'i4')]
+ return np.array(ms, dtype=fields)
diff --git a/basismixer/parse_tsv_alignment.py b/basismixer/parse_tsv_alignment.py
new file mode 100644
index 0000000..3b4d200
--- /dev/null
+++ b/basismixer/parse_tsv_alignment.py
@@ -0,0 +1,195 @@
+from partitura.utils import ensure_notearray
+import numpy as np
+import os
+
+
+def alignment_dicts_to_array(alignment):
+ """
+ create structured array from list of dicts type alignment.
+
+ Parameters
+ ----------
+ alignment : list
+ A list of note alignment dictionaries.
+
+ Returns
+ -------
+ alignarray : structured ndarray
+ Structured array containing note alignment.
+ """
+ fields = [('idx', 'i4'),
+ ('matchtype', 'U256'),
+ ('partid', 'U256'),
+ ('ppartid', 'U256')]
+
+ array = []
+ # for all dicts create an appropriate entry in an array:
+ # match = 0, deletion = 1, insertion = 2
+ for no, i in enumerate(alignment):
+ if i["label"] == "match":
+ array.append((no, "0", i["score_id"], str(i["performance_id"])))
+ elif i["label"] == "insertion":
+ array.append((no, "2", "undefined", str(i["performance_id"])))
+ elif i["label"] == "deletion":
+ array.append((no, "1", i["score_id"], "undefined"))
+ alignarray = np.array(array, dtype=fields)
+
+ return alignarray
+
+
+def save_csv_for_parangonada(outdir, part, ppart, align,
+ zalign=None, feature=None):
+ """
+ Save an alignment for visualization with parangonda.
+
+ Parameters
+ ----------
+ outdir : str
+ A directory to save the files into.
+ part : Part, structured ndarray
+ A score part or its note_array.
+ ppart : PerformedPart, structured ndarray
+ A PerformedPart or its note_array.
+ align : list
+ A list of note alignment dictionaries.
+ zalign : list, optional
+ A second list of note alignment dictionaries.
+ feature : list, optional
+ A list of expressive feature dictionaries.
+
+ """
+
+ part = ensure_notearray(part)
+ ppart = ensure_notearray(ppart)
+
+ ffields = [('velocity', ' 1, all quirk?
+ alignlist.append({"label": "match", "score_id": field0, "performance_id": fields[1]})
+ elif fields[0] == "insertion":
+ alignlist.append({"label": "insertion", "performance_id": fields[1]})
+ elif fields[0][0] == "n" and fields[1].startswith("deletion"):
+ field0 = fields[0]#.split("-")[0]
+ alignlist.append({"label": "deletion", "score_id": field0})
+ else:
+ raise Exception(f"Unknown alignment type: {fields[0]}")
+
+ return alignlist
\ No newline at end of file
diff --git a/basismixer/predictive_models/architectures.py b/basismixer/predictive_models/architectures.py
index 65e0ed1..175ce49 100644
--- a/basismixer/predictive_models/architectures.py
+++ b/basismixer/predictive_models/architectures.py
@@ -81,6 +81,7 @@ def __init__(self,
input_size, output_size,
recurrent_size, hidden_size,
n_layers=1, dropout=0.0,
+ recurrent_unit='GRU',
dense_nl=nn.ReLU(),
bidirectional=True,
batch_first=True,
@@ -102,7 +103,15 @@ def __init__(self,
self.n_layers = n_layers
self.batch_first = batch_first
self.bidirectional = bidirectional
- self.rnn = nn.GRU(input_size, self.recurrent_size,
+ self.recurrent_unit = recurrent_unit
+ if recurrent_unit == 'GRU':
+ recurrent_unit = nn.GRU
+ elif recurrent_unit == 'LSTM':
+ recurrent_unit = nn.LSTM
+ else:
+ raise Exception(recurrent_unit + "is not supported as recurrent unit")
+
+ self.rnn = recurrent_unit(input_size, self.recurrent_size,
self.n_layers,
batch_first=batch_first, dropout=dropout,
bidirectional=self.bidirectional)
@@ -117,19 +126,22 @@ def __init__(self,
if self.output_names is None:
self.output_names = [str(i) for i in range(self.output_size)]
- def init_hidden(self, batch_size):
+ def init_hidden(self, x):
if self.bidirectional:
n_layers = 2 * self.n_layers
else:
n_layers = self.n_layers
- return torch.zeros(n_layers, batch_size, self.recurrent_size)
+ if self.recurrent_unit == 'LSTM':
+ return (torch.zeros(n_layers, x.size(0), self.recurrent_size).type(x.type()),
+ torch.zeros(n_layers, x.size(0), self.recurrent_size).type(x.type()))
+ return torch.zeros(n_layers, x.size(0), self.recurrent_size).type(x.type())
@standardize
def forward(self, x):
batch_size = x.size(0)
seq_len = x.size(1)
- h0 = self.init_hidden(batch_size).type(x.type())
- # tensor of shape (batch_size, seq_len, hidden_size*2) if bidirectional
+ h0 = self.init_hidden(x)
+ # tensor of shape (batch_size, seq_len, hidden_size*2) if bidirectional, tuple of 2 tensors if LSTM
output, h = self.rnn(x, h0)
flatten_shape = (self.recurrent_size * 2
if self.bidirectional else self.recurrent_size)
diff --git a/basismixer/predictive_models/base.py b/basismixer/predictive_models/base.py
index e486631..f2cc9db 100644
--- a/basismixer/predictive_models/base.py
+++ b/basismixer/predictive_models/base.py
@@ -274,3 +274,11 @@ def dtype(self):
def dtype(self, dtype):
self._dtype = dtype
self.type(dtype)
+
+ def to(self, *args, **kwargs):
+ result = super().to(*args, **kwargs)
+ try:
+ self.device = next(result.parameters()).device
+ except StopIteration:
+ pass# needn't update device if we have no params
+ return result
diff --git a/basismixer/predictive_models/train.py b/basismixer/predictive_models/train.py
index f16321f..0863b86 100644
--- a/basismixer/predictive_models/train.py
+++ b/basismixer/predictive_models/train.py
@@ -8,7 +8,7 @@
from torch.utils.data import Dataset, Sampler
import torch.nn.functional as functional
from tqdm import tqdm
-
+import sys
LOGGER = logging.getLogger(__name__)
@@ -170,19 +170,20 @@ def compute_data_stats(self):
# self.model.out_std = out_std
- def train(self):
+ def train(self, fold=None):
self.compute_data_stats()
train_loss_name = getattr(self.train_loss, 'name', 'Train Loss')
- train_fn = os.path.join(self.out_dir, 'train_loss.txt')
+
+ train_fn = os.path.join(self.out_dir, f'train_loss{fold}.txt')
# Initialize TrainProgressMonitors
train_losses = TrainProgressMonitor(train_loss_name,
fn=train_fn)
valid_loss_name = None
valid_losses = None
if self.valid_dataloader is not None:
- valid_fn = os.path.join(self.out_dir, 'valid_loss.txt')
+ valid_fn = os.path.join(self.out_dir, f'valid_loss{fold}.txt')
if isinstance(self.valid_loss, (list, tuple)):
valid_loss_name = [getattr(crit, 'name', 'Valid Loss {0}'.format(i))
for i, crit in enumerate(self.valid_loss)]
@@ -194,21 +195,26 @@ def train(self):
validations_wo_improvement = 0
+ vl, r2 = self.valid_step(0)
+ valid_losses.update(0, vl, r2)
+ LOGGER.info('valid loss before training:' + valid_losses.last_loss + ' r2:' + str(r2))
+
# save before training
self.save_checkpoint(-1, False, True)
try:
for epoch in range(self.start_epoch, self.epochs):
tl = self.train_step(epoch)
- train_losses.update(epoch, tl)
+ train_losses.update(epoch, tl, r2)
do_checkpoint = np.mod(epoch + 1, self.save_freq) == 0
if do_checkpoint:
if self.valid_dataloader is not None:
- vl = self.valid_step(epoch)
- valid_losses.update(epoch, vl)
- LOGGER.info(train_losses.last_loss + '\t' + valid_losses.last_loss)
+ vl, r2 = self.valid_step(epoch)
+ valid_losses.update(epoch, vl, r2)
+ LOGGER.info('t_loss:' + train_losses.last_loss + '\t v_loss:' + valid_losses.last_loss +
+ '\t r2:' + str(r2))
else:
vl = [tl]
LOGGER.info(train_losses.last_loss)
@@ -298,6 +304,7 @@ def __init__(self, name='', fn='/tmp/train_progres.txt',
self.name = name
self.losses = []
self.epochs = []
+ self.correlations = []
self.fn = fn
self.show_fmt = show_fmt
self.write_fmt = write_fmt
@@ -316,13 +323,13 @@ def __init__(self, name='', fn='/tmp/train_progres.txt',
f.write(header)
- def update(self, epoch, loss):
+ def update(self, epoch, loss, correlations=None):
"""
Append new loss(es) and update the log file
"""
self.losses.append(loss)
-
self.epochs.append(epoch)
+ self.correlations.append(correlations)
self.update_log()
@@ -348,6 +355,13 @@ def update_log(self):
else:
out_str = self.write_fmt.format(float(self.losses[-1]))
+ if self.correlations[-1] is not None:
+ out_str += '\t r2:'
+ if isinstance(self.correlations[-1], (list, tuple, np.ndarray)):
+ out_str += '\t'.join([self.write_fmt.format(l) for l in self.correlations[-1]])
+ else:
+ out_str += self.write_fmt.format(float(self.correlations[-1]))
+
with open(self.fn, 'a') as f:
f.write('{0}\t{1}\n'.format(self.epochs[-1], out_str))
@@ -364,7 +378,8 @@ def __init__(self, model, train_loss, optimizer,
save_freq=10,
early_stopping=100,
out_dir='.',
- resume_from_saved_model=None):
+ resume_from_saved_model=None,
+ **rest):
super().__init__(model=model,
train_loss=train_loss,
optimizer=optimizer,
@@ -409,6 +424,7 @@ def train_step(self, epoch, *args, **kwargs):
self.optimizer.zero_grad()
loss.backward()
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1)
self.optimizer.step()
if self.lr_scheduler is not None:
@@ -416,10 +432,22 @@ def train_step(self, epoch, *args, **kwargs):
return np.mean(losses)
+ @staticmethod
+ def r2(preds, targets):
+ pm = torch.mean(preds, 1, keepdim=True)
+ tm = torch.mean(targets, 1, keepdim=True)
+ n = (preds - pm) * (targets - tm)
+ n = n.sum(1)
+
+ d = torch.sqrt(((preds - pm)**2).sum(1) * ((targets - tm)**2).sum(1))
+
+ return (n / d).mean(0)
+
def valid_step(self, *args, **kwargs):
self.model.eval()
losses = []
+ correlations = []
with torch.no_grad():
for input, target in self.valid_dataloader:
@@ -439,7 +467,9 @@ def valid_step(self, *args, **kwargs):
loss = [self.valid_loss(output, target)]
losses.append([l.item() for l in loss])
- return np.mean(losses, axis=0)
+ correlations.append(self.r2(output, target).cpu().numpy())
+
+ return np.mean(losses, axis=0), np.ma.masked_invalid(correlations).mean(0)
class MSELoss(nn.Module):
@@ -447,3 +477,23 @@ class MSELoss(nn.Module):
def __call__(self, predictions, targets):
return functional.mse_loss(predictions, targets)
+
+
+class MultiMSELoss(nn.Module):
+
+ WEIGHTS = {'velocity_dev': 1000, 'timing': 1000, 'articulation_log': 0.5, 'velocity_trend': 10, 'beat_period_standardized': 1,
+ 'beat_period_mean': 1, 'beat_period_std': 1}
+
+ def __init__(self, targets):
+ super(MultiMSELoss, self).__init__()
+ self.task_num = len(targets)
+ self.log_vars = nn.Parameter(torch.zeros((self.task_num)))
+ self.weights = [self.WEIGHTS[t] for t in targets]
+
+ def forward(self, preds, targets):
+ loss = 0
+
+ for i, v in enumerate(self.log_vars):
+ loss += torch.exp(-v) * self.weights[i] * functional.mse_loss(preds, targets) + v
+
+ return loss
\ No newline at end of file
diff --git a/basismixer/utils/__init__.py b/basismixer/utils/__init__.py
index e4e3321..d7bf78b 100644
--- a/basismixer/utils/__init__.py
+++ b/basismixer/utils/__init__.py
@@ -5,7 +5,9 @@
save_pyc_bz,
to_memmap,
pair_files,
- clip)
+ clip,
+ split_datasets_by_piece,
+ prepare_datasets_for_model)
from .music import (
get_unique_onset_idxs,
diff --git a/basismixer/utils/generic.py b/basismixer/utils/generic.py
index 7965c2a..b764a93 100644
--- a/basismixer/utils/generic.py
+++ b/basismixer/utils/generic.py
@@ -6,6 +6,7 @@
from collections import defaultdict
import numpy as np
+from torch.utils.data import ConcatDataset
def load_pyc_bz(fn):
@@ -129,5 +130,57 @@ def clip(v, low=0, high=127):
v[too_high] = high
+def split_datasets_by_piece(datasets, fold=0, folds=5, dataset_name='magaloff'):
+ from partitura.utils import partition
+ from pandas_ods_reader import read_ods
-
+ if dataset_name == 'asap':
+ ods = read_ods("../basismixer/assets/perfwise_insertions_deletions.ods")
+
+ relevant = ods.values[:, :2]
+ robust = [r[0].split('asap-dataset\\')[1] for r in relevant if r[1] in ['c']] # , 'c + highs', 'c + ornaments'
+
+ robust_performances = []
+ for d in datasets:
+ for r in robust:
+ if d.perf_name in r and d.name in r:
+ robust_performances.append(d)
+ datasets = robust_performances
+
+ by_piece = partition(lambda d: d.name, datasets)
+ pieces = list(by_piece.keys())
+
+ RNG = np.random.RandomState(1984)
+ RNG.shuffle(pieces)
+
+
+ test_size = 1 / folds
+ n_test = max(1, int(np.round(test_size*len(pieces))))
+ n_train = len(pieces) - n_test
+
+ if n_train < 1:
+ raise Exception('Not enough pieces to split datasets according '
+ 'to the specified test/validation proportions')
+
+ test_start = n_test * fold
+ test_end = n_test * (1 + fold)
+ test_pieces = pieces[test_start:test_end]
+ train_pieces = [p for p in pieces if not p in test_pieces]
+
+ test_set = [d for pd in [by_piece[p] for p in test_pieces] for d in pd]
+ train_set = [d for pd in [by_piece[p] for p in train_pieces] for d in pd]
+
+ return (ConcatDataset(train_set),
+ ConcatDataset(test_set))
+
+
+def prepare_datasets_for_model(datasets, model_config):
+ for bmds in datasets:
+ targets, output_names = [], []
+ for param in model_config['parameter_names']:
+ i = bmds.output_names.index(param)
+ targets.append(bmds.targets[:, i])
+ output_names.append(bmds.output_names[i])
+ bmds.targets, bmds.output_names = np.stack(targets, 1), np.array(output_names)
+
+ return datasets
\ No newline at end of file
diff --git a/bin/cross_validate_model.py b/bin/cross_validate_model.py
new file mode 100644
index 0000000..1fc44bd
--- /dev/null
+++ b/bin/cross_validate_model.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import logging
+import os
+from functools import partialmethod
+
+import numpy as np
+import torch
+from partitura import save_performance_midi, save_match
+from torch.utils.data import DataLoader, ConcatDataset
+from tqdm import tqdm
+
+from basismixer.helper.rendering import compute_basis_from_xml
+from basismixer.performance_codec import get_performance_codec
+from basismixer.predictive_models.train import MultiMSELoss, MSELoss
+
+logging.basicConfig(level=logging.INFO)
+
+from basismixer.predictive_models import (construct_model,
+ SupervisedTrainer,
+ FullPredictiveModel)
+from basismixer.utils import load_pyc_bz, save_pyc_bz, split_datasets_by_piece, prepare_datasets_for_model, \
+ post_process_predictions
+from basismixer import make_datasets
+
+LOGGER = logging.getLogger(__name__)
+
+tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)
+
+# def my_basis(part):
+# W = np.array([n.midi_pitch for n in part.notes_tied]).astype(np.float)
+# return W.reshape((-1, 1)), ['my']
+
+basis_features = ['polynomial_pitch_feature', 'duration_feature', 'metrical_strength_feature']
+
+CONFIG = [
+ dict(onsetwise=False,
+ basis_functions=basis_features,
+ parameter_names=['velocity_dev', 'timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized',
+ 'beat_period_mean', 'beat_period_std'],# 'velocity_dev','timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', 'beat_period_mean', 'beat_period_std'], #['velocity_dev', 'timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', 'beat_period_mean', 'beat_period_std']
+ seq_len=50,
+ model=dict(constructor=['basismixer.predictive_models', 'RecurrentModel'],
+ args=dict(recurrent_size=128,
+ n_layers=1,
+ hidden_size=64)),
+ train_args=dict(
+ optimizer_params=['Adam', dict(lr=1e-4)],
+ epochs=20,
+ save_freq=1,
+ early_stopping=100,
+ batch_size=128,
+ )
+ )
+]
+
+def jsonize_dict(input_dict):
+ out_dict = dict()
+ for k, v in input_dict.items():
+ if isinstance(v, np.ndarray):
+ out_dict[k] = v.tolist()
+ elif isinstance(v, dict):
+ out_dict[k] = jsonize_dict(v)
+ else:
+ out_dict[k] = v
+ return out_dict
+
+
+def render_fold_match(model, pieces, fold):
+ import warnings
+ predicter = FullPredictiveModel([model], in_names, out_names)
+ perf_codec = get_performance_codec(predicter.output_names)
+ for piece in pieces:
+ try:
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ if args.dataset_name == 'magaloff':
+ xml_fn = args.dataset_root_folder + 'xml/' + piece + '.xml'
+ elif args.dataset_name == 'asap':
+ xml_fn = args.dataset_root_folder + f'{piece}/xml_score.musicxml'
+ else:
+ xml_fn = args.dataset_root_folder + 'musicxml/' + piece + '.musicxml'
+ basis, part = compute_basis_from_xml(xml_fn, model.input_names)
+ onsets = np.array([n.start.t for n in part.notes_tied])
+ preds = predicter.predict(basis, onsets)
+ post_process_predictions(preds)
+ predicted_ppart = perf_codec.decode(part, preds)
+ out_folder = args.out_dir + f'/CV_fold_{fold}/'
+ os.makedirs(out_folder, exist_ok=True)
+ piece = piece.replace('/', '-')
+ save_performance_midi(predicted_ppart, out_folder + f'{piece}.mid')
+ alignment = [{'label': 'match', 'score_id': sn.id, 'performance_id': pn['id']} for sn, pn in zip(part.notes_tied, predicted_ppart.notes)]
+ save_match(alignment, predicted_ppart, part, out_folder + f'{piece}.match')
+ except Exception as e:
+ print(f"could not render {piece}")
+ print(e)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description="Train a Model given a dataset")
+ parser.add_argument("dataset_name", choices=["asap", "4x22", "magaloff"], help="Folder with MusicXML files")
+ parser.add_argument("dataset_root_folder", help="Root folder of the dataset")
+ parser.add_argument("--folds", help="number of folds in CV", default=10)
+ parser.add_argument("--cache", help=(
+ 'Path to pickled datasets file. If specified and the file exists, '
+ 'and the cached data matches the model specs' #<---todo
+ 'the `dataset_root_folder` option will be ignored'))
+ parser.add_argument("--pieces", help="Text file with valid pieces",
+ default=None)
+ parser.add_argument("--model-config", help="Model configuration",
+ default=CONFIG)
+ parser.add_argument("--out-dir", help="Output directory",
+ default='/tmp')
+ parser.add_argument('--targets', default=[], nargs='+')
+
+ args = parser.parse_args()
+
+ if args.targets:
+ CONFIG[0]["parameter_names"] = args.targets
+
+ folds = args.folds
+
+ # Load model architecture
+ if not isinstance(args.model_config, list):
+ model_config = json.load(open(args.model_config))
+ else:
+ model_config = args.model_config
+
+ if not os.path.exists(args.out_dir):
+ os.mkdir(args.out_dir)
+
+ json.dump(model_config,
+ open(os.path.join(args.out_dir, 'model_config.json'), 'w'),
+ indent=2)
+
+ if args.pieces is not None:
+ print('valid_pieces')
+ args.pieces = np.loadtxt(args.pieces, dtype=str)
+
+ rng = np.random.RandomState(1984)
+
+ datasets = []
+ models = []
+ target_idxs = []
+
+ if args.cache and os.path.exists(args.cache):
+ LOGGER.info('Loading data from {}'.format(args.cache))
+ datasets = list(load_pyc_bz(args.cache))
+ if args.targets:
+ datasets[0] = (datasets[0][0], datasets[0][1], args.targets)
+ else:
+ datasets = make_datasets(model_config,
+ args.dataset_root_folder,
+ args.dataset_name)
+ if args.cache:
+ LOGGER.info('Saving data to {}'.format(args.cache))
+ save_pyc_bz(datasets, args.cache)
+
+ for (mdatasets, in_names, out_names), config in zip(datasets, model_config):
+
+ mdatasets = prepare_datasets_for_model(mdatasets, config)
+ dataset = ConcatDataset(mdatasets)
+ batch_size = config['train_args'].pop('batch_size')
+
+ for fold in range(folds):
+ #### Create train and validation data loaders #####
+ train_set, test_set = split_datasets_by_piece(dataset.datasets, fold, folds, False)
+ train_loader, valid_loader = DataLoader(train_set, batch_size=batch_size), \
+ DataLoader(test_set, batch_size=batch_size)
+
+ #### Construct Models ####
+
+ model_cfg = config['model'].copy()
+ model_cfg['args']['input_names'] = in_names
+ model_cfg['args']['input_size'] = len(in_names)
+ model_cfg['args']['output_names'] = config['parameter_names']
+ model_cfg['args']['output_size'] = len(config['parameter_names'])
+ model_cfg['args']['input_type'] = 'onsetwise' if config['onsetwise'] else 'notewise'
+ model_name = ('-'.join(out_names) +
+ '-' + ('onsetwise' if config['onsetwise'] else 'notewise'))
+ model_out_dir = os.path.join(args.out_dir, model_name)
+ if not os.path.exists(model_out_dir):
+ os.mkdir(model_out_dir)
+ # save model config for later saving model
+ json.dump(jsonize_dict(model_cfg),
+ open(os.path.join(model_out_dir, 'config.json'), 'w'),
+ indent=2)
+ model = construct_model(model_cfg)
+
+ loss = MultiMSELoss(config['parameter_names']) if len(config['parameter_names']) > 1 else MSELoss()
+
+ ### Construct the optimizer ####
+ optim_name, optim_args = config['train_args']['optimizer_params']
+ optim = getattr(torch.optim, optim_name)
+ optim = optim(model.parameters(), **optim_args)
+
+ trainer = SupervisedTrainer(model=model,
+ train_loss=loss,
+ valid_loss=loss,
+ train_dataloader=train_loader,
+ valid_dataloader=valid_loader,
+ out_dir=model_out_dir,
+ optimizer=optim,
+ **config['train_args'])
+
+ trainer.train(f'_{fold}')
+
+ test_performance_names = set([t.name for t in test_set.datasets])
+ test_pieces = [p for p in mdatasets if p.name in test_performance_names]
+
+ render_fold_match(model, test_performance_names, fold)
+
diff --git a/bin/load_alignment_example b/bin/load_alignment_example.py
old mode 100755
new mode 100644
similarity index 99%
rename from bin/load_alignment_example
rename to bin/load_alignment_example.py
index b10a81d..5ec0574
--- a/bin/load_alignment_example
+++ b/bin/load_alignment_example.py
@@ -1,17 +1,14 @@
#!/usr/bin/env python
import argparse
-import json
-import logging
import matplotlib.pyplot as plt
import numpy as np
-
import partitura
from partitura.utils import partition
-import basismixer.basisfunctions
-from basismixer.utils import pair_files
+
import basismixer.performance_codec as pc
+from basismixer.utils import pair_files
def main():
diff --git a/bin/load_data_example b/bin/load_data_example.py
old mode 100755
new mode 100644
similarity index 99%
rename from bin/load_data_example
rename to bin/load_data_example.py
index d4e5820..0f79bc3
--- a/bin/load_data_example
+++ b/bin/load_data_example.py
@@ -25,7 +25,7 @@
from basismixer.utils import save_pyc_bz
from basismixer.utils import pair_files
-
+import sys
LOGGER = logging.getLogger(__name__)
diff --git a/bin/make_basis_example b/bin/make_basis_example.py
old mode 100755
new mode 100644
similarity index 92%
rename from bin/make_basis_example
rename to bin/make_basis_example.py
index 608ae82..a277ca5
--- a/bin/make_basis_example
+++ b/bin/make_basis_example.py
@@ -1,23 +1,18 @@
#!/usr/bin/env python
import argparse
-import json
-import logging
-import numpy as np
import matplotlib.pyplot as plt
-import torch
-
+import numpy as np
import partitura
+import partitura.musicanalysis as ma
+import partitura.score
-import basismixer
-from basismixer.utils import to_memmap
-import basismixer.basisfunctions as bf
def main():
parser = argparse.ArgumentParser(description="Create basis functions for a MusicXML file")
parser.add_argument("musicxml", help="MusicXML file")
- parser.add_argument("--basis", type=str, nargs='+', help='names of one or more basis functions')
+ parser.add_argument("--basis", type=str, nargs='+', help='names of one or more basis features')
# parser.add_argument("--cachefolder", type=str, help='Cache folder')
# parser.add_argument("--basisconfig", type=str,
# help=("JSON file specifying a set of basis functions for each expressive target. "
@@ -28,8 +23,10 @@ def main():
# basis_names = list(set(i for ii in basis_config.values() for i in ii))
part = partitura.load_musicxml(args.musicxml)
+ part = partitura.score.merge_parts(part)
+ part = partitura.score.unfold_part_maximal(part, update_ids=False)
print(part.pretty())
- basis, names = bf.make_basis(part, args.basis)
+ basis, names = ma.make_note_feats(part, args.basis)
# plot
onsets = None # np.array([n.start.t for n in part.notes_tied])
plot_basis(basis, names, '/tmp/out.png', onsets, title=part.part_name)
diff --git a/bin/render_performance b/bin/render_performance.py
old mode 100755
new mode 100644
similarity index 97%
rename from bin/render_performance
rename to bin/render_performance.py
index 7787b07..69f0d34
--- a/bin/render_performance
+++ b/bin/render_performance.py
@@ -24,7 +24,7 @@
remove_grace_notes)
from basismixer import TOY_MODEL_CONFIG
-from basismixer.basisfunctions import make_basis
+from partitura.musicanalysis import make_note_feats
from basismixer.performance_codec import get_performance_codec
from basismixer.predictive_models import (FullPredictiveModel,
construct_model)
@@ -37,7 +37,9 @@
RENDER_CONFIG)
logging.basicConfig(level=logging.INFO)
+import sys
LOGGER = logging.getLogger(__name__)
+LOGGER.addHandler(logging.StreamHandler(sys.stdout))
def load_model(model_config, default_values=DEFAULT_VALUES):
@@ -134,7 +136,7 @@ def compute_basis_from_score(score_fn, input_names):
# part = unfold_part_maximal(part)
# Compute basis functions
- _basis, bf_names = make_basis(part, list(set([bf.split('.')[0] for bf in input_names])))
+ _basis, bf_names = make_note_feats(part, list(set([bf.split('.')[0] for bf in input_names])))
basis = np.zeros((len(_basis), len(input_names)))
for i, n in enumerate(input_names):
try:
@@ -146,7 +148,6 @@ def compute_basis_from_score(score_fn, input_names):
return basis, part
-
def predict(model_config, score_fn, default_values=DEFAULT_VALUES):
"""
Main method for predicting a performance.
diff --git a/bin/train_model_example b/bin/train_model_example
deleted file mode 100755
index 2b5eac6..0000000
--- a/bin/train_model_example
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import json
-import logging
-import os
-
-import numpy as np
-import torch
-
-from torch.utils.data.sampler import SubsetRandomSampler
-from torch.utils.data import DataLoader, ConcatDataset
-
-logging.basicConfig(level=logging.INFO)
-
-from basismixer.predictive_models import (construct_model,
- SupervisedTrainer,
- MSELoss)
-from basismixer.utils import load_pyc_bz, save_pyc_bz
-from basismixer import make_datasets
-
-LOGGER = logging.getLogger(__name__)
-
-# def my_basis(part):
-# W = np.array([n.midi_pitch for n in part.notes_tied]).astype(np.float)
-# return W.reshape((-1, 1)), ['my']
-
-CONFIG = [
- dict(onsetwise=False,
- basis_functions=['polynomial_pitch_basis',
- 'loudness_direction_basis',
- 'tempo_direction_basis',
- 'articulation_basis',
- 'duration_basis',
- # my_basis,
- 'grace_basis',
- 'slur_basis',
- 'fermata_basis',
- # 'metrical_basis'
- 'metrical_strength_basis',
- 'time_signature_basis',
- 'relative_score_position_basis'
- ],
- parameter_names=['velocity_dev', 'timing', 'articulation_log'],
- seq_len=1,
- model=dict(constructor=['basismixer.predictive_models', 'FeedForwardModel'],
- args=dict(hidden_size=128)),
- train_args=dict(
- optimizer=['Adam', dict(lr=1e-4)],
- epochs=10,
- save_freq=10,
- early_stopping=100,
- batch_size=1000,
- )
- ),
- dict(onsetwise=True,
- basis_functions=['polynomial_pitch_basis',
- 'loudness_direction_basis',
- 'tempo_direction_basis',
- 'articulation_basis',
- 'duration_basis',
- 'slur_basis',
- 'grace_basis',
- 'fermata_basis',
- # 'metrical_basis'
- 'metrical_strength_basis',
- 'time_signature_basis',
- 'relative_score_position_basis'
- ],
- parameter_names=['velocity_trend', 'beat_period_standardized',
- 'beat_period_mean', 'beat_period_std'],
- seq_len=100,
- model=dict(constructor=['basismixer.predictive_models', 'RecurrentModel'],
- args=dict(recurrent_size=128,
- n_layers=1,
- hidden_size=64)),
- train_args=dict(
- optimizer=['Adam', dict(lr=1e-4)],
- epochs=10,
- save_freq=5,
- early_stopping=100,
- batch_size=50,
- )
- )
-]
-
-def jsonize_dict(input_dict):
- out_dict = dict()
- for k, v in input_dict.items():
- if isinstance(v, np.ndarray):
- out_dict[k] = v.tolist()
- elif isinstance(v, dict):
- out_dict[k] = jsonize_dict(v)
- else:
- out_dict[k] = v
- return out_dict
-
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(
- description="Train a Model given a dataset")
- parser.add_argument("xmlfolder", help="Folder with MusicXML files")
- parser.add_argument("matchfolder", help="Folder with match files")
- parser.add_argument("--datasets", help=(
- 'Path to pickled datasets file. If specified and the file exists, '
- 'the `xmlfolder` and `matchfolder` options will be ignored, and it '
- 'will be assumed that datasets in the specified file correspond to '
- 'the model configuration. If specifed and the path does not exist, '
- 'the datasets are computed and saved to the specified path.'))
- parser.add_argument("--quirks", action='store_true',
- help="Use this option when training on magaloff/zeilinger")
- parser.add_argument("--pieces", help="Text file with valid pieces",
- default=None)
- parser.add_argument("--model-config", help="Model configuration",
- default=CONFIG)
- parser.add_argument("--out-dir", help="Output directory",
- default='/tmp')
- args = parser.parse_args()
-
- # Load model architecture
- if not isinstance(args.model_config, list):
- model_config = json.load(open(args.model_config))
- else:
- model_config = args.model_config
-
- if not os.path.exists(args.out_dir):
- os.mkdir(args.out_dir)
-
- json.dump(model_config,
- open(os.path.join(args.out_dir, 'model_config.json'), 'w'),
- indent=2)
-
- if args.pieces is not None:
- print('valid_pieces')
- args.pieces = np.loadtxt(args.pieces, dtype=str)
-
- rng = np.random.RandomState(1984)
-
- datasets = []
- models = []
- target_idxs = []
- input_idxs = []
- valid_size = 0.20
-
- if args.datasets and os.path.exists(args.datasets):
- LOGGER.info('Loading data from {}'.format(args.datasets))
- datasets = load_pyc_bz(args.datasets)
- else:
- datasets = make_datasets(model_config,
- args.xmlfolder,
- args.matchfolder,
- pieces=args.pieces,
- quirks=args.quirks)
- if args.datasets:
- LOGGER.info('Saving data to {}'.format(args.datasets))
- save_pyc_bz(datasets, args.datasets)
-
- for (mdatasets, in_names, out_names), config in zip(datasets, model_config):
- dataset = ConcatDataset(mdatasets)
- batch_size = config['train_args'].pop('batch_size')
-
- #### Create train and validation data loaders #####
- dataset_idx = np.arange(len(dataset))
- rng.shuffle(dataset_idx)
- len_valid = int(np.round(len(dataset) * valid_size))
- valid_idx = dataset_idx[0:len_valid]
- train_idx = dataset_idx[len_valid:]
-
- train_sampler = SubsetRandomSampler(train_idx)
- valid_sampler = SubsetRandomSampler(valid_idx)
- train_loader = DataLoader(dataset,
- batch_size=batch_size,
- sampler=train_sampler)
- valid_loader = DataLoader(dataset,
- batch_size=batch_size,
- sampler=valid_sampler)
-
- #### Construct Models ####
-
- model_cfg = config['model'].copy()
- model_cfg['args']['input_names'] = in_names
- model_cfg['args']['input_size'] = len(in_names)
- model_cfg['args']['output_names'] = out_names
- model_cfg['args']['output_size'] = len(out_names)
- model_cfg['args']['input_type'] = 'onsetwise' if config['onsetwise'] else 'notewise'
- model_name = ('-'.join(out_names) +
- '-' + ('onsetwise' if config['onsetwise'] else 'notewise'))
- model_out_dir = os.path.join(args.out_dir, model_name)
- if not os.path.exists(model_out_dir):
- os.mkdir(model_out_dir)
- # save model config for later saving model
- json.dump(jsonize_dict(model_cfg),
- open(os.path.join(model_out_dir, 'config.json'), 'w'),
- indent=2)
- model = construct_model(model_cfg)
-
- loss = MSELoss()
-
- ### Construct the optimizer ####
- optim_name, optim_args = config['train_args']['optimizer']
- optim = getattr(torch.optim, optim_name)
- config['train_args']['optimizer'] = optim(model.parameters(), **optim_args)
-
- trainer = SupervisedTrainer(model=model,
- train_loss=loss,
- valid_loss=loss,
- train_dataloader=train_loader,
- valid_dataloader=valid_loader,
- out_dir=model_out_dir,
- **config['train_args'])
-
- trainer.train()
-
diff --git a/bin/train_model_example.py b/bin/train_model_example.py
new file mode 100644
index 0000000..2c5eaf9
--- /dev/null
+++ b/bin/train_model_example.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import logging
+import os
+
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, ConcatDataset
+
+from basismixer.predictive_models.train import MultiMSELoss
+
+logging.basicConfig(level=logging.INFO)
+
+from basismixer.predictive_models import (construct_model,
+ SupervisedTrainer)
+from basismixer.utils import load_pyc_bz, save_pyc_bz, split_datasets_by_piece, prepare_datasets_for_model
+from basismixer import make_datasets
+
+LOGGER = logging.getLogger(__name__)
+
+# def my_basis(part):
+# W = np.array([n.midi_pitch for n in part.notes_tied]).astype(np.float)
+# return W.reshape((-1, 1)), ['my']
+
+basis_features = ['polynomial_pitch_feature', 'duration_feature', 'metrical_strength_feature']
+
+CONFIG = [
+ dict(onsetwise=False,
+ basis_functions=basis_features,
+ parameter_names=['velocity_dev','timing', 'articulation_log', 'velocity_trend'],# 'velocity_dev','timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', 'beat_period_mean', 'beat_period_std'], #['velocity_dev', 'timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', 'beat_period_mean', 'beat_period_std']
+ seq_len=50,
+ model=dict(constructor=['basismixer.predictive_models', 'RecurrentModel'],
+ args=dict(recurrent_size=128,
+ n_layers=1,
+ hidden_size=64)),
+ train_args=dict(
+ optimizer=['Adam', dict(lr=1e-4)],
+ epochs=10,
+ save_freq=1,
+ early_stopping=100,
+ batch_size=50,
+ )
+ )
+]
+
+def jsonize_dict(input_dict):
+ out_dict = dict()
+ for k, v in input_dict.items():
+ if isinstance(v, np.ndarray):
+ out_dict[k] = v.tolist()
+ elif isinstance(v, dict):
+ out_dict[k] = jsonize_dict(v)
+ else:
+ out_dict[k] = v
+ return out_dict
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description="Train a Model given a dataset")
+ parser.add_argument("dataset_name", choices=["asap", "4x22", "magaloff"], help="Folder with MusicXML files")
+ parser.add_argument("dataset_root_folder", help="Root folder of the dataset")
+ parser.add_argument("--cache", help=(
+ 'Path to pickled datasets file. If specified and the file exists, '
+ 'and the cached data matches the model specs' #<---todo
+ 'the `dataset_root_folder` option will be ignored'))
+ parser.add_argument("--pieces", help="Text file with valid pieces",
+ default=None)
+ parser.add_argument("--model-config", help="Model configuration",
+ default=CONFIG)
+ parser.add_argument("--out-dir", help="Output directory",
+ default='/tmp')
+ args = parser.parse_args()
+
+ # Load model architecture
+ if not isinstance(args.model_config, list):
+ model_config = json.load(open(args.model_config))
+ else:
+ model_config = args.model_config
+
+ if not os.path.exists(args.out_dir):
+ os.mkdir(args.out_dir)
+
+ json.dump(model_config,
+ open(os.path.join(args.out_dir, 'model_config.json'), 'w'),
+ indent=2)
+
+ if args.pieces is not None:
+ print('valid_pieces')
+ args.pieces = np.loadtxt(args.pieces, dtype=str)
+
+ rng = np.random.RandomState(1984)
+
+ datasets = []
+ models = []
+ target_idxs = []
+
+ if args.cache and os.path.exists(args.cache):
+ LOGGER.info('Loading data from {}'.format(args.cache))
+ datasets = load_pyc_bz(args.cache)
+ else:
+ datasets = make_datasets(model_config,
+ args.dataset_root_folder,
+ args.dataset_name)
+ if args.cache:
+ LOGGER.info('Saving data to {}'.format(args.cache))
+ save_pyc_bz(datasets, args.cache)
+
+ for (mdatasets, in_names, out_names), config in zip(datasets, model_config):
+ mdatasets = prepare_datasets_for_model(mdatasets, config)
+ dataset = ConcatDataset(mdatasets)
+ batch_size = config['train_args'].pop('batch_size')
+
+ #### Create train and validation data loaders #####
+ train_set, test_set = split_datasets_by_piece(dataset.datasets, 0, 5, False)
+ train_loader, valid_loader = DataLoader(train_set, batch_size=batch_size), \
+ DataLoader(test_set, batch_size=batch_size)
+
+ #### Construct Models ####
+
+ model_cfg = config['model'].copy()
+ model_cfg['args']['input_names'] = in_names
+ model_cfg['args']['input_size'] = len(in_names)
+ model_cfg['args']['output_names'] = config['parameter_names']
+ model_cfg['args']['output_size'] = len(config['parameter_names'])
+ model_cfg['args']['input_type'] = 'onsetwise' if config['onsetwise'] else 'notewise'
+ model_name = ('-'.join(out_names) +
+ '-' + ('onsetwise' if config['onsetwise'] else 'notewise'))
+ model_out_dir = os.path.join(args.out_dir, model_name)
+ if not os.path.exists(model_out_dir):
+ os.mkdir(model_out_dir)
+ # save model config for later saving model
+ json.dump(jsonize_dict(model_cfg),
+ open(os.path.join(model_out_dir, 'config.json'), 'w'),
+ indent=2)
+ model = construct_model(model_cfg)
+
+ loss = MultiMSELoss(config['parameter_names'])#MSELoss()
+
+ ### Construct the optimizer ####
+ optim_name, optim_args = config['train_args']['optimizer']
+ optim = getattr(torch.optim, optim_name)
+ config['train_args']['optimizer'] = optim(list(model.parameters()) + list(loss.parameters()), **optim_args)
+
+ trainer = SupervisedTrainer(model=model,
+ train_loss=loss,
+ valid_loss=loss,
+ train_dataloader=train_loader,
+ valid_dataloader=valid_loader,
+ out_dir=model_out_dir,
+ **config['train_args'])
+
+ trainer.train()
+
diff --git a/environment.yml b/environment.yml
index 39dbd60..090f28d 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,5 +18,5 @@ dependencies:
- pip:
- appdirs
- soundfile
- - git+https://github.com/OFAI/partitura.git@develop
- - git+https://github.com/OFAI/basismixer.git@develop
+ - git+https://github.com/OFAI/partitura.git@main
+ - git+https://github.com/OFAI/basismixer.git@asap
diff --git a/requirements.txt b/requirements.txt
index 7068e4e..a50eb87 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,4 @@ scipy>=1.2
numpy>=1.16
torch>=1.3
tqdm>=4.43
-git+https://github.com/OFAI/partitura.git@develop
+git+https://github.com/OFAI/partitura.git@main
diff --git a/setup.py b/setup.py
index ef8634d..29b2d2b 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
EMAIL = ''
AUTHOR = 'Carlos Cancino-Chacón, Maarten Grachten'
REQUIRES_PYTHON = '>=3.5'
-VERSION = '0.1.0'
+VERSION = '0.1.1'
# What packages are required for this module to be executed?
REQUIRED = [