diff --git a/README.md b/README.md index aab7c45..d840eaf 100644 --- a/README.md +++ b/README.md @@ -113,3 +113,8 @@ If you want to include any of these files (or a variation or modification thereo ## Acknowledgements This research has received funding from the European Research Council (ERC) under the European Union's Horizon 2020 research and innovation programme under grant agreement No. 670035 (project [*Con Espressione*](https://www.jku.at/en/institute-of-computational-perception/research/projects/con-espressione/)). + +

+ +

+ diff --git a/basismixer/__init__.py b/basismixer/__init__.py index a2f4efa..3322e4e 100644 --- a/basismixer/__init__.py +++ b/basismixer/__init__.py @@ -11,8 +11,7 @@ OnsetwiseDecompositionDynamicsCodec, TimeCodec, ) -from basismixer.basisfunctions import make_basis -# from basismixer.data import make_dataset + from basismixer.data import make_datasets # define a version variable diff --git a/basismixer/assets/perfwise_insertions_deletions.ods b/basismixer/assets/perfwise_insertions_deletions.ods new file mode 100644 index 0000000..11016f6 Binary files /dev/null and b/basismixer/assets/perfwise_insertions_deletions.ods differ diff --git a/basismixer/basisfunctions.py b/basismixer/basisfunctions.py deleted file mode 100644 index da26c1f..0000000 --- a/basismixer/basisfunctions.py +++ /dev/null @@ -1,1419 +0,0 @@ -#!/usr/bin/env python - -import sys -import logging -import numpy as np -from scipy.interpolate import interp1d -import types - -import partitura.score as score - -LOGGER = logging.getLogger(__name__) - - -class InvalidBasisException(Exception): - pass - - -def print_basis_functions(): - """Print a list of all basisfunction names defined in this module, - with descriptions where available. - - """ - module = sys.modules[__name__] - doc_indent = 4 - for name in list_basis_functions(): - print('* {}'.format(name)) - member = getattr(sys.modules[__name__], name) - if member.__doc__: - print(' ' * doc_indent + member.__doc__.replace('\n', ' ' * doc_indent + '\n')) - - -def list_basis_functions(): - """Return a list of all basisfunction names defined in this module. - - The basisfunction names listed here can be specified by name in - the `make_basis` function. For example: - - >>> basis, names = make_basis(part, ['metrical_basis', 'articulation_basis']) - - Returns - ------- - list - A list of strings - - """ - module = sys.modules[__name__] - bfs = [] - exclude = {'make_basis'} - for name in dir(module): - if name in exclude: - continue - member = getattr(sys.modules[__name__], name) - if isinstance(member, types.FunctionType) and name.endswith('_basis'): - bfs.append(name) - return bfs - - -def make_basis(part, basis_functions): - """Compute the specified basis functions for a part. - - The function returns the computed basis functions as a N x M - array, where N equals `len(part.notes_tied)` and M equals the - total number of descriptors of all basis functions that occur in - part. - - Furthermore the function returns the names of the basis functions. - A list of strings of size M. The names have the name of the - function prepended to the name of the descriptor. For example if a - function named `abc_basis` returns descriptors `a`, `b`, and `c`, - then the list of names returned by `make_basis(part, - ['abc_basis'])` will be ['abc_basis.a', 'abc_basis.b', - 'abc_basis.c']. - - Parameters - ---------- - part : Part - The score as a Part instance - basis_functions : list - A list of basis functions. Elements of the list can be either - the functions themselves or the names of a basis function as - strings (or a mix). The basis functions specified by name are - looked up in the `basismixer.basisfunctions` module. - - Returns - ------- - basis : ndarray - The basis functions - names : list - The basis names - - """ - - acc = [] - - for bf in basis_functions: - - if isinstance(bf, str): - # get function by name from module - func = getattr(sys.modules[__name__], bf) - elif isinstance(bf, types.FunctionType): - func = bf - else: - LOGGER.warning('Ignoring unknown basis function {}'.format(bf)) - - bf, bn = func(part) - - # check if the size and number of the basis function are correct - if bf.shape[1] != len(bn): - msg = ('number of basis names {} does not equal ' - 'number of basis {}'.format(len(bn), bf.shape[1])) - raise InvalidBasisException(msg) - n_notes = len(part.notes_tied) - if len(bf) != n_notes: - msg = ('length of basis {} does not equal ' - 'number of notes {}'.format(len(bf), n_notes)) - raise InvalidBasisException(msg) - - if np.any(np.logical_or(np.isnan(bf), np.isinf(bf))): - problematic = np.unique(np.where(np.logical_or(np.isnan(bf), np.isinf(bf)))[1]) - msg = ('NaNs or Infs found in the following basis: {} ' - .format(', '.join(np.array(bn)[problematic]))) - raise InvalidBasisException(msg) - - # prefix basis names by function name - bn = ['{}.{}'.format(func.__name__, n) for n in bn] - - acc.append((bf, bn)) - - _data, _names = zip(*acc) - basis_data = np.column_stack(_data) - basis_names = [n for ns in _names for n in ns] - return basis_data, basis_names - - -def polynomial_pitch_basis(part): - """Polynomial pitch basis. - - Returns: - * pitch : the midi pitch of the note - * pitch^2 : the square of the midi pitch - * pitch^3 : the power of 3 of the midi pitch - - """ - - basis_names = ['pitch', 'pitch^2', 'pitch^3'] - max_pitch = 127 - pitches = np.array( - [n.midi_pitch for n in part.notes_tied]).astype(np.float) - W = np.column_stack((pitches / max_pitch, - pitches**2 / max_pitch**2, - pitches**3 / max_pitch**3)) - - return W, basis_names - - -def duration_basis(part): - """Duration basis. - - Returns: - * duration : the duration of the note - - """ - - basis_names = ['duration'] - - nd = np.array([(n.start.t, n.end_tied.t) for n in part.notes_tied]) - bm = part.beat_map - - durations_beat = bm(nd[:, 1]) - bm(nd[:, 0]) - W = durations_beat - W.shape = (-1, 1) - return W, basis_names - -def onset_basis(part): - """Onset basis - - Returns: - * onset : the onset of the note in beats - * score_position : position of the note in the score between 0 (the beginning of the piece) and 1 (the end of the piece) - - TODO: - * rel_position_repetition - """ - basis_names = ['onset', 'score_position'] - - onsets = np.array([n.start.t for n in part.notes_tied]) - bm = part.beat_map - onsets_beat = bm(onsets) - rel_position = normalize(onsets_beat, method='minmax') - - W = np.column_stack((onsets_beat, rel_position)) - - return W, basis_names - -def relative_score_position_basis(part): - W, names = onset_basis(part) - return W[:, 1:], names[1:] - - -def grace_basis(part): - """Grace basis. - - Returns: - * grace_note : 1 when the note is a grace note, 0 otherwise - * n_grace : the length of the grace note sequence to which - this note belongs (0 for non-grace notes) - * grace_pos : the (1-based) position of the grace note in - the sequence (0 for non-grace notes) - - """ - - basis_names = ['grace_note', 'n_grace', 'grace_pos'] - - notes = part.notes_tied - W = np.zeros((len(notes), 3)) - for i, n in enumerate(notes): - grace = isinstance(n, score.GraceNote) - if grace: - n_grace = n.grace_seq_len - W[i, 0] = 1 - W[i, 1] = n_grace - W[i, 2] = n_grace - sum(1 for _ in n.iter_grace_seq()) + 1 - - return W, basis_names - - -def loudness_direction_basis(part): - """The loudness directions in part. - - This function returns a varying number of descriptors, depending - on which directions are present. Some directions are grouped - together. For example 'decrescendo' and 'diminuendo' are encoded - together in a descriptor 'loudness_decr'. The descriptor names of - textual directions such as 'adagio' are the verbatim directions. - - Some possible descriptors: - * p : piano - * f : forte - * pp : pianissimo - * loudness_incr : crescendo direction - * loudness_decr : decrescendo or diminuendo direction - - """ - - onsets = np.array([n.start.t for n in part.notes_tied]) - N = len(onsets) - - directions = list(part.iter_all( - score.LoudnessDirection, include_subclasses=True)) - - def to_name(d): - if isinstance(d, score.ConstantLoudnessDirection): - return d.text - elif isinstance(d, score.ImpulsiveLoudnessDirection): - return d.text - elif isinstance(d, score.IncreasingLoudnessDirection): - return 'loudness_incr' - elif isinstance(d, score.DecreasingLoudnessDirection): - return 'loudness_decr' - - basis_by_name = {} - for d in directions: - j, bf = basis_by_name.setdefault(to_name(d), - (len(basis_by_name), np.zeros(N))) - bf += basis_function_activation(d)(onsets) - - W = np.empty((len(onsets), len(basis_by_name))) - names = [None] * len(basis_by_name) - for name, (j, bf) in basis_by_name.items(): - W[:, j] = bf - names[j] = name - - return W, names - - -def tempo_direction_basis(part): - """The tempo directions in part. - - This function returns a varying number of descriptors, depending - on which directions are present. Some directions are grouped - together. For example 'adagio' and 'molto adagio' are encoded - together in a descriptor 'adagio'. - - Some possible descriptors: - * adagio : directions like 'adagio', 'molto adagio' - - """ - onsets = np.array([n.start.t for n in part.notes_tied]) - N = len(onsets) - - directions = list(part.iter_all( - score.TempoDirection, include_subclasses=True)) - - def to_name(d): - if isinstance(d, score.ResetTempoDirection): - ref = d.reference_tempo - if ref: - return ref.text - else: - return d.text - elif isinstance(d, score.ConstantTempoDirection): - return d.text - elif isinstance(d, score.IncreasingTempoDirection): - return 'tempo_incr' - elif isinstance(d, score.DecreasingTempoDirection): - return 'tempo_decr' - - basis_by_name = {} - for d in directions: - j, bf = basis_by_name.setdefault(to_name(d), - (len(basis_by_name), np.zeros(N))) - bf += basis_function_activation(d)(onsets) - - W = np.empty((len(onsets), len(basis_by_name))) - names = [None] * len(basis_by_name) - for name, (j, bf) in basis_by_name.items(): - W[:, j] = bf - names[j] = name - - return W, names - - -def articulation_direction_basis(part): - """ - """ - onsets = np.array([n.start.t for n in part.notes_tied]) - N = len(onsets) - - directions = list(part.iter_all( - score.ArticulationDirection, include_subclasses=True)) - - def to_name(d): - return d.text - - basis_by_name = {} - - for d in directions: - - j, bf = basis_by_name.setdefault(to_name(d), - (len(basis_by_name), np.zeros(N))) - bf += basis_function_activation(d)(onsets) - - W = np.empty((len(onsets), len(basis_by_name))) - names = [None] * len(basis_by_name) - - for name, (j, bf) in basis_by_name.items(): - - W[:, j] = bf - names[j] = name - - return W, names - - -def basis_function_activation(direction): - epsilon = 1e-6 - - if isinstance(direction, (score.DynamicLoudnessDirection, - score.DynamicTempoDirection)): - # a dynamic direction will be encoded as a ramp from d.start.t to - # d.end.t, and then a step from d.end.t to the start of the next - # constant direction. - - # There are two potential issues: - - # Issue 1. d.end is None (e.g. just a ritardando without dashes). In this case - if direction.end: - direction_end = direction.end.t - else: - # assume the end of d is the end of the measure: - measure = next(direction.start.iter_prev(score.Measure, eq=True), None) - if measure: - direction_end = measure.start.t - else: - # no measure, unlikely, but not impossible. - direction_end = direction.start.t - - if isinstance(direction, score.TempoDirection): - next_dir = next(direction.start.iter_next( - score.ConstantTempoDirection), None) - if isinstance(direction, score.ArticulationDirection): - next_dir = next(direction.start.iter_next( - score.ConstantArticulationDirection), None) - else: - next_dir = next(direction.start.iter_next( - score.ConstantLoudnessDirection), None) - - if next_dir: - # TODO: what do we do when next_dir is too far away? - sustained_end = next_dir.start.t - else: - # Issue 2. there is no next constant direction. In that case the - # basis function will be a ramp with a quarter note ramp - sustained_end = direction_end + direction.start.quarter - - x = [direction.start.t, - direction_end - epsilon, - sustained_end - epsilon] - y = [0, 1, 1] - - elif isinstance(direction, (score.ConstantLoudnessDirection, - score.ConstantArticulationDirection, - score.ConstantTempoDirection)): - x = [direction.start.t - epsilon, - direction.start.t, - direction.end.t - epsilon, - direction.end.t] - y = [0, 1, 1, 0] - - else: # impulsive - x = [direction.start.t - epsilon, - direction.start.t, - direction.start.t + epsilon] - y = [0, 1, 0] - - return interp1d(x, y, bounds_error=False, fill_value=0) - - -def slur_basis(part): - """Slur basis. - - Returns: - * slur_incr : a ramp function that increases from 0 - to 1 over the course of the slur - * slur_decr : a ramp function that decreases from 1 - to 0 over the course of the slur - - """ - names = ['slur_incr', 'slur_decr'] - onsets = np.array([n.start.t for n in part.notes_tied]) - slurs = part.iter_all(score.Slur) - W = np.zeros((len(onsets), 2)) - - for slur in slurs: - if not slur.end: - continue - x = [slur.start.t, slur.end.t] - y_inc = [0, 1] - y_dec = [1, 0] - W[:, 0] += interp1d(x, y_inc, bounds_error=False, fill_value=0)(onsets) - W[:, 1] += interp1d(x, y_dec, bounds_error=False, fill_value=0)(onsets) - - return W, names - - -def articulation_basis(part): - """Articulation basis. - - This basis returns articulation-related note annotations, such as accents, legato, and tenuto. - - Possible descriptors: - * accent : 1 when the note has an annotated accent sign - * legato : 1 when the note has an annotated legato sign - * staccato : 1 when the note has an annotated staccato sign - ... - - """ - names = ['accent', 'strong-accent', 'staccato', 'tenuto', - 'detached-legato', 'staccatissimo', 'spiccato', - 'scoop', 'plop', 'doit', 'falloff', 'breath-mark', - 'caesura', 'stress', 'unstress', 'soft-accent'] - basis_by_name = {} - notes = part.notes_tied - N = len(notes) - for i, n in enumerate(notes): - if n.articulations: - for art in n.articulations: - if art in names: - j, bf = basis_by_name.setdefault( - art, - (len(basis_by_name), np.zeros(N))) - bf[i] = 1 - - M = len(basis_by_name) - W = np.empty((N, M)) - names = [None] * M - - for name, (j, bf) in basis_by_name.items(): - W[:, j] = bf - names[j] = name - - return W, names - -# # for a subset of the articulations do e.g. -# def staccato_basis(part): -# W, names = articulation_basis(part) -# if 'staccato' in names: -# i = names.index('staccato') -# return W[:, i:i + 1], ['staccato'] -# else: -# return np.empty(len(W)), [] - - -def fermata_basis(part): - """Fermata basis. - - Returns: - * fermata : 1 when the note coincides with a fermata sign. - - """ - names = ['fermata'] - onsets = np.array([n.start.t for n in part.notes_tied]) - W = np.zeros((len(onsets), 1)) - for ferm in part.iter_all(score.Fermata): - W[onsets == ferm.start.t, 0] = 1 - return W, names - - -def metrical_basis(part): - """Metrical basis - - This basis encodes the metrical position in the bar. For example - the first beat in a 3/4 meter is encoded in a binary descriptor - 'metrical_3_4_0', the fifth beat in a 6/8 meter as - 'metrical_6_8_4', etc. Any positions that do not fall on a beat - are encoded in a basis suffixed '_weak'. For example a note - starting on the second 8th note in a bar of 4/4 meter will have a - non-zero value in the 'metrical_4_4_weak' descriptor. - - """ - notes = part.notes_tied - ts_map = part.time_signature_map - bm = part.beat_map - basis_by_name = {} - eps = 10**-6 - - for i, n in enumerate(notes): - - beats, beat_type = ts_map(n.start.t).astype(int) - measure = next(n.start.iter_prev(score.Measure, eq=True), None) - - if measure: - measure_start = measure.start.t - else: - measure_start = 0 - - pos = bm(n.start.t) - bm(measure_start) - - if pos % 1 < eps: - name = 'metrical_{}_{}_{}'.format(beats, beat_type, int(pos)) - else: - name = 'metrical_{}_{}_weak'.format(beats, beat_type) - - j, bf = basis_by_name.setdefault(name, - (len(basis_by_name), np.zeros(len(notes)))) - bf[i] = 1 - - W = np.empty((len(notes), len(basis_by_name))) - names = [None] * len(basis_by_name) - for name, (j, bf) in basis_by_name.items(): - W[:, j] = bf - names[j] = name - - return W, names - -def metrical_strength_basis(part): - """Metrical strength basis - - This basis encodes the beat phase (relative position of a note within - the measure), as well as metrical strength of common time signatures. - """ - notes = part.notes_tied - ts_map = part.time_signature_map - bm = part.beat_map - - names = ['beat_phase', - 'metrical_strength_downbeat', - 'metrical_strength_secondary', - 'metrical_strength_weak'] - - W = np.zeros((len(notes), len(names))) - for i, n in enumerate(notes): - - beats, beat_type = ts_map(n.start.t).astype(int) - measure = next(n.start.iter_prev(score.Measure, eq=True), None) - - if beats == 4: - # for 4/4 - sec_beat = 2 - elif beats == 6: - # for 6/8 - sec_beat = 3 - elif beats == 12: - # for 12/8 - sec_beat = 6 - else: - sec_beat = None - - if measure: - measure_start = measure.start.t - else: - measure_start = 0 - - pos = bm(n.start.t) - bm(measure_start) - - m_pos = np.mod(pos, beats) - - W[i, 0] = m_pos / beats - - if m_pos == 0: - W[i, 1] = 1 - elif m_pos == sec_beat: - W[i, 2] = 1 - else: - W[i, 3] = 1 - - return W, names - -def time_signature_basis(part): - """TIme Signature basis - This basis encodes the time signature of the note in two sets of one-hot vectors, - a one hot encoding of number of beats and a one hot encoding of beat type - """ - - notes = part.notes_tied - ts_map = part.time_signature_map - possible_beats = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 'other'] - possible_beat_types = [1, 2, 4, 8, 16, 'other'] - W_beats = np.zeros((len(notes), len(possible_beats))) - W_types = np.zeros((len(notes), len(possible_beat_types))) - - names = (['time_signature_num_{0}'.format(b) for b in possible_beats] + - ['time_signature_den_{0}'.format(b) for b in possible_beat_types]) - - for i, n in enumerate(notes): - beats, beat_type = ts_map(n.start.t).astype(int) - - if beats in possible_beats: - W_beats[i, beats - 1] = 1 - else: - W_beats[i, -1] = 1 - - if beat_type in possible_beat_types: - W_types[i, possible_beat_types.index(beat_type)] = 1 - else: - W_types[i, -1] = 1 - - W = np.column_stack((W_beats, W_types)) - - return W, names - -def vertical_neighbor_basis(part): - """Vertical neighbor basis. - - Describes various aspects of simultaneously starting notes. - - Returns: - * n_total : - * n_above : - * n_below : - * highest_pitch : - * lowest_pitch : - * pitch_range : - - """ - # the list of descriptors - names = ['n_total', 'n_above', 'n_below', - 'highest_pitch', 'lowest_pitch', 'pitch_range'] - # notes - notes = part.notes_tied - - W = np.empty((len(notes), len(names))) - for i, n in enumerate(part.notes_tied): - neighbors = np.array([n.midi_pitch for n in - n.start.starting_objects[score.Note]]) - max_pitch = np.max(neighbors) - min_pitch = np.min(neighbors) - W[i, 0] = len(neighbors) - W[i, 1] = np.sum(neighbors > n.midi_pitch) - W[i, 2] = np.sum(neighbors < n.midi_pitch) - W[i, 3] = max_pitch - W[i, 4] = min_pitch - W[i, 5] = max_pitch - min_pitch - return W, names - - -def normalize(data, method='minmax'): - """ - Normalize data in one of several ways. - - The available normalization methods are: - - * minmax - Rescale `data` to the range `[0, 1]` by subtracting the minimum - and dividing by the range. If `data` is a 2d array, each column is - rescaled to `[0, 1]`. - - * tanh - Rescale `data` to the interval `(-1, 1)` using `tanh`. Note that - if `data` is non-negative, the output interval will be `[0, 1)`. - - * tanh_unity - Like "soft", but rather than rescaling strictly to the range (-1, - 1), following will hold: - - normalized = normalize(data, method="tanh_unity") - np.where(data==1) == np.where(normalized==1) - - That is, the normalized data will equal one wherever the original data - equals one. The target interval is `(-1/np.tanh(1), 1/np.tanh(1))`. - - Parameters - ---------- - data: ndarray - Data to be normalized - method: {'minmax', 'tanh', 'tanh_unity'}, optional - The normalization method. Defaults to 'minmax'. - - Returns - ------- - ndarray - Normalized copy of the data - """ - - """Normalize the data in `data`. There are several normalization - - """ - if method == 'minmax': - vmin = np.min(data, 0) - vmax = np.max(data, 0) - - if np.isclose(vmin, vmax): - # Return all values as 0 or as 1? - return np.zeros_like(data) - else: - return (data - vmin) / (vmax - vmin) - elif method == 'tanh': - return np.tanh(data) - elif method == 'tanh_unity': - return np.tanh(data) / np.tanh(1) - - -# from extra.utils.data_utils import smooth -# from extra.data_handling.sparse_feature_extraction import ( -# scorepart_to_notes, -# notes_to_notecentered_pianoroll, -# notes_to_pianoroll_note_slices, -# ) -# from music_utils.key_id.key_identification import ( -# key_identification, -# key_to_scaledegree, -# fifths_to_key, -# SCALE_DEGREES, -# KEYS) - -# # from extra.data_handling.annotation_tokenizer import tokenizer, TokenizeException - -# class NoteCenteredPianoRollBasis(Basis): -# # lowest_pitch = 21 -# # highest_pitch = 108 -# neighbour_pitches = 36 -# neighbour_beats = 8 -# beat_div = 8 -# names = ['{0}'.format(i) for i in -# range((2 * neighbour_pitches + 1) * (2 * neighbour_beats * beat_div))] - -# @classmethod -# def makeBasis(cls, score_part): -# notes, idx = scorepart_to_notes(score_part) -# W = notes_to_notecentered_pianoroll( -# notes, onset_only=False, -# neighbour_pitches=cls.neighbour_pitches, -# neighbour_beats=cls.neighbour_beats, -# beat_div=cls.beat_div) -# # print('pitch span', r) -# return FeatureBasis(W, cls.make_full_names()) - - -# def scorepart_to_onsetwise_pianoroll(score_part, morphetic_pitch=False, return_ioi=False): -# notes, _ = scorepart_to_notes(score_part, morphetic_pitch) -# start, end = notes[0, 0], notes[-1, 0] -# return notes_to_pianoroll_note_slices(notes, return_ioi=return_ioi) - - -# def zero_mean_pianoroll(X): -# t, p = np.nonzero(X) -# center = 64 -# Z = np.zeros_like(X) -# for i, t_i in enumerate(unique_onset_idx(t)): -# avg_pitch = int(np.round(np.mean(p[t_i]))) -# new_pitches = p[t_i] - avg_pitch + center -# try: -# Z[t[t_i], new_pitches] = 1 -# except IndexError: -# new_pitches[new_pitches < 0 ] = 0 -# new_pitches[new_pitches >= Z.shape[1] ] = Z.shape[1] - 1 -# Z[t[t_i], new_pitches] = 1 -# return Z - -# class SelfSimilarityBasis(Basis): -# _filter_sizes_abs = (5, 10, 20, 50, 100) -# _max_prop = .7 -# _filter_sizes_prop = (.005, .01, .05, .1, .2, .3, .5) -# names = (['abs_{}'.format(x) for x in _filter_sizes_abs] + -# ['prop_{}'.format(x) for x in _filter_sizes_prop] + -# ['centered_abs_{}'.format(x) for x in _filter_sizes_abs] + -# ['centered_prop_{}'.format(x) for x in _filter_sizes_prop]) - -# @classmethod -# def makeBasis(cls, score_part): -# pr = scorepart_to_onsetwise_pianoroll(score_part, morphetic_pitch=True).toarray() -# onsets = np.array([n.start.t for n in score_part.notes]) -# uox = unique_onset_idx(onsets) - -# N = len(onsets) -# # N x 128 -# X_n = pr.T[np.array([x[0] for x in uox])] -# pr = None -# X = np.corrcoef(X_n) -# X[np.isnan(X)] = 0 - -# names = [] -# W = np.empty((N, 0)) - -# W_abs, k_abs = self_sim_conv_absolute(X, K=cls._filter_sizes_abs, max_prop=cls._max_prop) -# if len(k_abs) > 0: -# names.extend(['abs_{}'.format(x) for x in k_abs]) -# W = np.column_stack((W, expand_array(W_abs, uox, N))) - -# W_prop, k_prop = self_sim_conv_proportional(X, K=cls._filter_sizes_prop) -# if len(k_prop) > 0: -# names.extend(['prop_{}'.format(x) for x in k_prop]) -# W = np.column_stack((W, expand_array(W_prop, uox, N))) - -# X_n = zero_mean_pianoroll(X_n) - -# X = np.corrcoef(X_n) -# X[np.isnan(X)] = 0 - -# W_abs, k_abs = self_sim_conv_absolute(X, K=cls._filter_sizes_abs, max_prop=cls._max_prop) -# if len(k_abs) > 0: -# names.extend(['centered_abs_{}'.format(x) for x in k_abs]) -# W = np.column_stack((W, expand_array(W_abs, uox, N))) - -# W_prop, k_prop = self_sim_conv_proportional(X, K=cls._filter_sizes_prop) -# if len(k_prop) > 0: -# names.extend(['centered_prop_{}'.format(x) for x in k_prop]) -# W = np.column_stack((W, expand_array(W_prop, uox, N))) - -# return FeatureBasis(normalize(W), cls.make_full_names(names)) - -# def expand_array(x, idx, N): -# """ -# Given an array `x` and a list of grouped indices `idx`, return a new array `y`, -# where the values of `x` are duplicated according to `idx`, such that: - -# y[idx[i]] = x[i], where idx[i] is an array of integers - -# This function is a convenience function to duplicate onsetwise features (`x`) to -# obtain notewise features (`y`). - -# Argument `N` is the length of the output array. - -# Warning: there are no checks that `N` is consistent with `idx`, and that the -# values in `idx` fill all of `y`. - -# For example: let x = [1, 2, 3] and idx = [[0, 1], [2], [3, 4]], (and N = 5, -# redundantly), then y = [1, 1, 2, 3, 3] - -# Parameters -# ---------- -# x: ndarray -# Array with values (can be multidimensional) -# idx: list -# List of index-arrays -# N: int -# Size of the expanded array - -# Returns -# ------- -# ndarray -# Expanded array -# """ - - -# s = tuple([N] + list(x.shape)[1:]) -# y = np.empty(s) -# for v, i in zip(x, idx): -# y[i] = v -# return y - -# class PianorollBasis(Basis): -# names = ['{0}'.format(i) for i in -# range(128)] + ['log2_duration'] - -# @classmethod -# def makeBasis(cls, score_part): -# W, ioi = scorepart_to_onsetwise_pianoroll(score_part, return_ioi=True) -# W = W.T.toarray() -# # print(W.shape, ioi.shape) -# # print(np.unique(np.sort(ioi))) -# assert np.sum(np.sum(W, 1) > 0) == W.shape[0] -# W = np.column_stack((W, np.log2(ioi))) -# return FeatureBasis(soft_normalize(W), cls.make_full_names()) - -# class ExtremePitchBasis(Basis): - -# """ -# This basis computes the highest and lowest pitch at each score position -# Each row in the resulting matrix corresponds to a note in the score and -# contains the highest and lowest pitch of the score position to which -# the note belongs (i.e. for the same extreme pitches will appear for all -# notes that belong to the same score position. - -# highestpitch : highest pitch of each score position -# lowestpitch : lowest pitch of each score position -# """ -# names = ['highestpitch', 'lowestpitch'] - -# @classmethod -# def makeBasis(cls, scorePart): - -# Q = 127.0 -# # Pitches and onsets -# p_o = np.array([(n.midi_pitch, n.start.t) for n in scorePart.notes]) - -# unique_onsets = np.unique(p_o[:, 1]) - -# unique_onset_idxs = [np.where(p_o[:, 1] == u)[0] for u in unique_onsets] - -# pitches = [p_o[ix, 0] for ix in unique_onset_idxs] - -# W = np.zeros((len(p_o), 2)) - -# for u, p in zip(unique_onset_idxs, pitches): -# W[u, 0] = p.max() / Q -# W[u, 1] = p.min() / Q - -# return FeatureBasis(W, cls.make_full_names()) - - -# class VerticalIntervalClassBasis(Basis): -# """ -# Three features describing up to three vertical interval classes -# above the bass, i.e. the intervals between the notes of a chord and -# the lowest pitch excluding pitch class repetition and octaves - -# vertical_intervals_{1,2,3} -# """ - -# names = ['vertical_interval_class_1', -# 'vertical_interval_class_2', -# 'vertical_interval_class_3'] - -# @classmethod -# def makeBasis(cls, scorePart): - -# Q = 11.0 -# # Pitches and onsets -# p_o = np.array([(n.midi_pitch, n.start.t) for n in scorePart.notes]) - -# # get unique onsets -# unique_onsets = np.unique(p_o[:, 1]) - -# # get unique_onset_idxs -# unique_onset_idxs = [np.where(p_o[:, 1] == u)[0] for u in unique_onsets] - -# pitches = [p_o[ix, 0] for ix in unique_onset_idxs] - -# W = np.zeros((len(p_o), 3)) - -# for u, p in zip(unique_onset_idxs, pitches): -# # Vertical interval class combination -# pitch_classes = np.unique(np.mod(p, 12)) -# vertical_intervals = pitch_classes - pitch_classes.min() -# vertical_intervals.sort() - -# # Normalize the vintc to lie between 0 and 1 -# W[u, :len(vertical_intervals[slice(1, 4)])] = ( -# vertical_intervals[slice(1, 4)]) / Q - -# return FeatureBasis(W, cls.make_full_names()) - - -# class VerticalNeighborBasis(Basis): - -# """ -# This basis has three members: - -# lower_neighbors: the number of simultaneously starting notes with lower pitches -# upper_neighbors: the number of simultaneously starting notes with higher pitches - -# """ - -# names = ['lower_neighbors', 'upper_neighbors', 'total_neighbors'] - -# @classmethod -# def makeBasis(cls, scorePart): -# t_dict = partition(lambda n: n.start.t, scorePart.notes) -# n_dict = {} -# for k, v in t_dict.items(): -# v.sort(key=attrgetter('midi_pitch')) -# N = len(v) - 1 -# for i, n in enumerate(v): -# n_dict[n] = (i, N - i, N + 1) -# W = np.array([n_dict[n] for n in scorePart.notes]) -# return FeatureBasis(soft_normalize(W), cls.make_full_names()) - - -# class UIOIBasis(Basis): -# """ -# This basis has two members: - -# ioi_prev: the time interval between the current onset (t) with the previous onset (t-1) -# ioi_next: the time interval between (t-2) and (t-3) - - -# """ -# names = ['u_ioi_prev', 'u_ioi_next'] -# @classmethod -# def makeBasis(cls, scorePart): -# onsets = np.array([n.start.t for n in scorePart.notes]) -# u_onset_idx = unique_onset_idx(onsets) -# u_onsets = np.array([onsets[ii[0]] for ii in u_onset_idx]) -# # include offset of last note for computing last 'ioi' -# u_ioi = np.diff(np.r_[u_onsets, scorePart.notes[-1].end.t]) -# u_W = np.column_stack((np.r_[0, u_ioi[:-1]], u_ioi)).astype(np.float) -# W = np.empty((len(onsets), 2)) -# for i, ii in enumerate(u_onset_idx): -# W[ii,:] = u_W[i,:] -# return FeatureBasis(soft_normalize(W, preserve_unity=True), cls.make_full_names()) - -# class IOIBasis(Basis): - -# """ -# This basis has three members: - -# ioi_prev1: the time interval between the current onset (t) with the previous onset (t-1) -# ioi_prev2: the time interval between (t-1) and (t-2) -# ioi_prev3: the time interval between (t-2) and (t-3) - -# Each of these values is 0 in case there are no prior onsets - -# In this basis, the next onset is defined as the next onset that is - -# """ - -# names = ['ioi_prev1', 'ioi_prev2', 'ioi_prev3', -# 'ioi_next1', 'ioi_next2', 'ioi_next3'] - -# @classmethod -# def makeBasis(cls, scorePart): -# t_dict = {} -# for note in scorePart.notes: -# pred1 = note.start.get_prev_of_type(Note) -# if len(pred1) > 1: -# d1 = note.start.t - pred1[0].start.t -# pred2 = pred1[0].start.get_prev_of_type(Note) -# if len(pred2) > 1: -# d2 = pred1[0].start.t - pred2[0].start.t -# pred3 = pred2[0].start.get_prev_of_type(Note) -# if len(pred3) > 1: -# d3 = pred2[0].start.t - pred3[0].start.t -# else: -# d3 = 0 -# else: -# d2 = 0 -# d3 = 0 -# else: -# d1 = 0 -# d2 = 0 -# d3 = 0 - -# succ1 = note.start.get_next_of_type(Note) -# if len(succ1) > 1: -# d4 = succ1[0].start.t - note.start.t -# succ2 = succ1[0].start.get_next_of_type(Note) -# if len(succ2) > 1: -# d5 = succ2[0].start.t - succ1[0].start.t -# succ3 = succ2[0].start.get_next_of_type(Note) -# if len(succ3) > 1: -# d6 = succ3[0].start.t - succ2[0].start.t -# else: -# d6 = 0 -# else: -# d5 = 0 -# d6 = 0 -# else: -# d4 = 0 -# d5 = 0 -# d6 = 0 - -# t_dict[note.start] = (d1, d2, d3, d4, d5, d6) -# W = np.array([t_dict[n.start] for n in scorePart.notes]) -# return FeatureBasis(soft_normalize(W, preserve_unity=True), cls.make_full_names()) - -# # @classmethod -# # def makeBasis(cls, scorePart): -# # t_dict = {} -# # for tp in scorePart.timeline.points: -# # if tp.prev: -# # d1 = tp.t - tp.prev.t -# # if tp.prev.prev: -# # d2 = tp.prev.t - tp.prev.prev.t -# # if tp.prev.prev.prev: -# # d3 = tp.prev.prev.t - tp.prev.prev.prev.t -# # else: -# # d3 = 0 -# # else: -# # d2 = 0 -# # else: -# # d1 = 0 -# # d2 = 0 -# # d3 = 0 - -# # if tp.next: -# # d4 = tp.next.t - tp.t -# # if tp.next.next: -# # d5 = tp.next.next.t - tp.next.t -# # if tp.next.next.next: -# # d6 = tp.next.next.next.t - tp.next.next.t -# # else: -# # d6 = 0 -# # else: -# # d5 = 0 -# # else: -# # d4 = 0 -# # d5 = 0 -# # d6 = 0 - -# # t_dict[tp] = (d1, d2, d3, d4, d5, d6) -# # W = np.array([t_dict[n.start] for n in scorePart.notes]) -# # return FeatureBasis(normalize(W), cls.make_full_names()) - - -# class RitardandoBasis(Basis): -# names = ['ritardando'] - -# @classmethod -# def makeBasis(cls, scorePart): -# end = scorePart.timeline.points[0].t -# start = scorePart.timeline.points[-1].t -# W = np.array([n.start.t for n in scorePart.notes], dtype=np.float) -# W = np.exp(((W - start) / (end - start))**100) - 1 -# W.shape = (-1, 1) -# return FeatureBasis(soft_normalize(W, preserve_unity=True), cls.make_full_names()) - - -# class SlurBasis(Basis): -# names = ['slur_step', 'slur_incr', 'slur_decr'] - -# @classmethod -# def makeBasis(cls, scorePart): -# slurs = scorePart.timeline.get_all_of_type(Slur) - -# W = np.zeros((len(scorePart.notes), 3), dtype=np.float32) - -# if len(slurs) > 0: -# ss = np.array([(s.voice, s.start.t, s.end.t) -# for s in slurs -# if (s.start is not None and -# s.end is not None)]) - -# if ss.shape[0] < len(slurs): -# LOGGER.info("Ignoring {0} of {1} slurs for missing start or end" -# .format(len(slurs) - ss.shape[0], len(slurs))) - -# # begin make arch -# onsets = np.array([n.start.t for n in scorePart.notes]) -# first = np.min(onsets) -# last = np.max(onsets) -# eps = 10**-4 - -# for v, start, end in ss: -# tmap = np.array([[min(first, start - eps), 0, 0], -# [start - eps, 0, 0], -# [start, 0, 1], -# [end, 1, 0], -# [end + eps, 0, 0], -# [max(last, end + eps), 0, 0]]) -# incr = interp1d(tmap[:, 0], tmap[:, 1]) -# decr = interp1d(tmap[:, 0], tmap[:, 2]) -# W[:, 1] += incr(onsets) -# W[:, 2] += decr(onsets) - -# start_idx = np.argsort(ss[:, 1]) -# end_idx = np.argsort(ss[:, 2]) - -# ss_start = ss[start_idx,:] -# ss_end = ss[end_idx,:] - -# idx = np.arange(ss.shape[0], dtype=np.int) - -# idx_start = idx[start_idx] -# idx_end = idx[end_idx] - -# ndnv = np.array([(n.start.t, n.voice) for n in scorePart.notes]) - -# start_before = np.searchsorted( -# ss_start[:, 1], ndnv[:, 0], side='right') -# end_after = np.searchsorted(ss_end[:, 2], ndnv[:, 0], side='left') - -# for i in range(ndnv.shape[0]): -# spanning = tuple( -# set(idx_start[:start_before[i]]).intersection(set(idx_end[end_after[i]:]))) -# W[i, 0] = 1 if ndnv[i, 1] in ss[spanning, 0] else 0 - -# return FeatureBasis(W, cls.make_full_names()) - - -# class ScoreTimeBasis(Basis): -# names = ['beat'] -# @classmethod -# def makeBasis(cls, scorePart): -# nd = np.array([n.start.t for n in scorePart.notes]) -# bm = scorePart.beat_map -# W = bm(nd) -# W.shape = (-1, 1) -# return FeatureBasis(W, cls.make_full_names()) - -# class RestBasis(Basis): -# names = ['precedes_rest', 'precedes_rest_narrow', 'precedes_rest_mid', 'precedes_rest_wide'] - -# @classmethod -# def makeBasis(cls, scorePart): -# smooth_k = 2 -# smooth_k_mid = 6 -# smooth_k_wide = 10 - -# t_rest = dict((n.start.t, 1 if len(n.end.get_starting_objects_of_type(Note)) == 0 else 0) -# for n in scorePart.notes) - -# t_sorted = sorted(t_rest.keys()) - -# smoothed = np.column_stack(([t_rest[k] for k in t_sorted], -# smooth([t_rest[k] for k in t_sorted], smooth_k), -# smooth([t_rest[k] for k in t_sorted], smooth_k_mid), -# smooth([t_rest[k] for k in t_sorted], smooth_k_wide))) -# rest_smooth = dict((k, x) for k, x in zip(t_sorted, smoothed)) - -# W = np.array([rest_smooth[n.start.t] for n in scorePart.notes]) -# return FeatureBasis(normalize(W), cls.make_full_names()) - - -# class RepeatBasis(Basis): -# names = ['repeat_end', 'repeat_end_short_ramp', 'repeat_end_med_ramp', 'repeat_end_wide_ramp'] - -# @classmethod -# def makeBasis(cls, scorePart): -# smooth_k = 2 -# smooth_k_mid = 6 -# smooth_k_wide = 10 - -# on_repeat = dict((tp.t, 0 if len(tp.get_ending_objects_of_type(Repeat)) == 0 else 1) -# for tp in scorePart.timeline.points) -# on_repeat[scorePart.timeline.points[-1].t] = 1 -# t_sorted = sorted(on_repeat.keys()) - -# smoothed = np.column_stack(( -# [on_repeat[k] for k in t_sorted], -# smooth([on_repeat[k] for k in t_sorted], smooth_k), -# smooth([on_repeat[k] for k in t_sorted], smooth_k_mid), -# smooth([on_repeat[k] for k in t_sorted], smooth_k_wide))) - -# repeat_smooth = dict((k, x) for k, x in zip(t_sorted, smoothed)) - -# W = np.array([repeat_smooth[n.end.t] for n in scorePart.notes]) -# return FeatureBasis(normalize(W), cls.make_full_names()) - - -# class HarmonicBasis(Basis): -# names = KEYS + SCALE_DEGREES - -# @classmethod -# def makeBasis(cls, scorePart): -# tl = scorePart.timeline -# key_sigs = tl.get_all_of_type(KeySignature) -# measures = tl.get_all_of_type(Measure) - -# note_info = np.array([(n.midi_pitch, n.start.t, n.end.t) -# for n in scorePart.notes]) - -# bar_onsets = np.array([m.start.t for m in measures]) - -# key_info = [(ks.fifths, ks.mode, ks.start.t) -# for ks in key_sigs] - -# idx = np.searchsorted(note_info[:, 1], bar_onsets) -# idx_key = np.searchsorted(note_info[:, 1], [x[2] for x in key_info]) - -# key_segments = [] -# for key_notes in np.split(note_info, idx_key)[1:]: -# key_segments.append(key_notes) - -# segments = [] -# for bar_notes in np.split(note_info, idx)[1:]: -# if len(bar_notes) > 0: -# segments.append(bar_notes) - -# key_gt = [] -# for ks, seg in zip(key_info, key_segments): -# key_gt.append((fifths_to_key(ks[0], ks[1], seg), ks[2])) - -# # for segment in segments: -# # print segment -# viterbi_path = key_identification(segments, key_gt, 'temperley') -# # print viterbi path -# key_seq = [] -# scale_degree_sect = [] - -# for ky, segment in zip(viterbi_path, segments): -# # print ky,segment -# for kych in key_gt: - -# try: -# if segment[0, 1] >= kych[1]: -# kyc = kych[0] - -# except: -# pass - -# scale_degree_sect += [key_to_scaledegree(ky, kyc)] * len(segment) -# key_seq += [ky] * len(segment) - -# W_key = np.zeros((len(note_info[:, 0]), 24)) -# W_sd = np.zeros((len(note_info[:, 0]), len(SCALE_DEGREES))) -# for ii, ky in enumerate(zip(key_seq, scale_degree_sect)): -# W_key[ii, KEYS.index(ky[0])] = 1 -# W_sd[ii, SCALE_DEGREES.index(ky[1])] = 1 - -# W = np.hstack((W_key, W_sd)) - -# return FeatureBasis(W, cls.make_full_names()) - -# class PredictivePianorollBasis(Basis): - -# # If this class attribute is defined, it will be set by -# # `lbm.utils.basisUtilities.set_derived_data_folder`, which should be called -# # before the makeBasis method is called -# data_folder = None - -# @classmethod -# def makeBasis(cls, scorePart): -# onsets = np.array([n.start.t for n in scorePart.notes]) -# # Initialize matrix of basis functions -# uox = unique_onset_idx(onsets) -# N = len(uox) -# W = None -# names = None -# if cls.data_folder is not None: -# fn = os.path.join(cls.data_folder, -# '{}_hidden.npy'.format(scorePart.piece_name)) -# try: -# W_onset = np.load(fn) -# # print(W_onset.shape, fn, len(uox)) -# W = expand_array(W_onset, uox, len(onsets)) -# if len(W) != len(onsets): -# LOGGER.warning(('Data shape from {} does not coincide with ' -# 'the number of onsets in the score: {} vs {}') -# .format(fn, W.shape, len(onsets))) -# W = None -# else: -# names = ['feature{0:04}'.format(i) for i in range(W.shape[1])] -# except: -# LOGGER.warning('Could not load data from {}'.format(fn)) - - -# else: -# LOGGER.warning('Cannot create PredictivePianorollBasis, because no derived data folder has been specified') - -# if W is None: -# names = [] -# W = np.zeros((len(onsets), 0)) - -# return FeatureBasis(soft_normalize(W), cls.make_full_names(names)) - -# class HarmonicTensionBasis(Basis): -# names = ['key', 'diameter', 'centroid'] - -# # this should be set from outside before makeBasis is called -# data_folder = None - -# @classmethod -# def makeBasis(cls, scorePart): - -# onsets = np.array([n.start.t for n in scorePart.notes]) -# bars = scorePart.timeline.get_all_of_type(Measure) - -# # compute how many outputs Dorien code generates for this piece: - -# # default value used in Dorien's code -# nvis = 4 -# ndivs = (bars[-1].end.t - bars[0].start.t) -# ws = (ndivs / len(bars) ) / nvis -# # print('expected nr of nlines in files', ndivs / ws) - -# start = bars[0].start.t -# end = bars[-1].end.t - -# # Initialize matrix of basis functions -# W = np.zeros((len(onsets), len(cls.names))) - -# if cls.data_folder is not None: -# # Load harmonic tension information from Doriens -# # XmlTensionVisualiser.jar output files -# for i, b_name in enumerate(cls.names): -# fn = os.path.join(cls.data_folder, -# '{}_{}.data'.format(scorePart.piece_name, b_name)) -# try: -# data = np.loadtxt(fn)[:, 1] -# except: -# LOGGER.warning('Could not load data from {}'.format(fn)) -# continue - -# data = np.r_[0, data, 0] -# times = np.arange(start, end, ws) - -# if len(times) == len(data) - 2: -# times = np.r_[start, times + ws / 2., end] -# elif len(times) - 1 == len(data): -# times = np.r_[start, times[1:], end] -# else: -# LOGGER.info('HarmonicTensionBasis expected {} data points from {}, got {}' -# .format(len(times), fn, len(data))) -# times = np.linspace(start, end, len(data)) -# W[:, i] = interp1d(times, data)(onsets) -# else: -# LOGGER.warning('Cannot create HarmonicTensionBasis, because no derived data folder has been specified') - -# return FeatureBasis(soft_normalize(W), cls.make_full_names()) diff --git a/basismixer/data.py b/basismixer/data.py index 8cad09d..431ff5c 100644 --- a/basismixer/data.py +++ b/basismixer/data.py @@ -1,158 +1,186 @@ #!/usr/bin/env python import logging -import os +import multiprocessing +import warnings +from multiprocessing import Pool +from pathlib import Path import numpy as np -from torch.utils.data import Dataset, ConcatDataset - +import partitura.musicanalysis from partitura import load_musicxml, load_match -from partitura.score import expand_grace_notes, remove_grace_notes -from basismixer.basisfunctions import make_basis +from partitura.score import expand_grace_notes +from torch.utils.data import Dataset + +from basismixer.performance_codec import get_performance_codec from basismixer.utils import (pair_files, get_unique_onset_idxs, notewise_to_onsetwise) - -from basismixer.performance_codec import get_performance_codec +from .parse_tsv_alignment import load_alignment_from_ASAP LOGGER = logging.getLogger(__name__) +from partitura.score import GraceNote, Note -def make_datasets(model_specs, mxml_folder, match_folder, pieces=None, - quirks=False, gracenotes='remove'): - """Create an dataset for each in a list of model specifications. - - A model specification is a dictionary with the keys 'onsetwise' - (bool), 'basis_functions' (a list of basis function names), - 'parameter_names' (a list of parameter names) and 'seq_len' (an - integer). For example: - { - 'onsetwise': False, - 'basis_functions: ['polynomial_pitch_basis', 'duration_basis'], - 'parameter_names': ['velocity_trend', 'beat_period'], - 'seq_len': 1 - } +def remove_grace_notes(part): + """Remove all grace notes from a timeline. - The datasets are created based on pairs of MusicXML files and - match files found in `mxml_folder` and `match_folder` - respectively. + The specified timeline object will be modified in place. Parameters ---------- - model_specs : list - A list of dictionaries - mxml_folder : str - Path to folder with MusicXML files - match_folder : str - Path to folder with Match files - pieces : list or None, optional - If not None only pieces with a piecename occurring in the list - are included in the datasets - quirks : bool, optional - If True some changes are made to make the function work with - the Magaloff/Zeilinger datasets. Defaults to False. - - Returns - ------- - list - A list of triplets (datasets, input_names, output_names) with - the same length as `model_specs`. `datasets` is a list of - datasets (one per performance), `input_names` and - `output_names` are labels to identify the inputs and outputs, - respectively - - """ + timeline : Timeline + The timeline from which to remove the grace notes - all_targets = list(set([n for model_spec in model_specs - for n in model_spec['parameter_names']])) + """ + for gn in list(part.iter_all(GraceNote)): + for n in list(part.iter_all(Note)): + if n.tie_next == gn: + n.tie_next = None + part.remove(gn) - perf_codec = get_performance_codec(all_targets) - # different subsets of basis functions may be returned for different - # pieces. idx_map maintains a global mapping from basis names to - # indices into the columns of the model inputs. - bf_idx_map = {} - # a list to gather the data from which the dataset will be built. +def process_piece(piece_performances, perf_codec, all_basis_functions, gracenotes, dataset_name): + piece, performances = piece_performances data = [] - - all_basis_functions = set([n for model_spec in model_specs - for n in model_spec['basis_functions']]) - folders = dict(mxml=mxml_folder, match=match_folder) - - # by_prefix should be used when there are multiple performances - # (assuming the matchfile names consist of the piece name + a - # suffix). When there is only a single performance per piece (like for - # magaloff/zeilinger), we assume musicxml and matchfile have the same - # name (up to the file extension), so we switch by_prefix of in the - # file pairing. In that way files are only paired if they are have - # identical names (up to the extension). - for piece, files in pair_files(folders, by_prefix=not quirks).items(): - if pieces is not None and piece not in pieces: - continue - # load the score - xml_fn = files['mxml'].pop() - LOGGER.info('Processing {}'.format(xml_fn)) - part = load_musicxml(xml_fn) - bm = part.beat_map - - # get indices of the unique onsets - if gracenotes == 'remove': - # Remove grace notes - remove_grace_notes(part) + quirks = False + if dataset_name == 'asap': + name = '/'.join(str(piece).split('asap')[1].split('/')[1:-1]) + else: + name = piece.split('/')[-1].split('.')[0] + #quirks = True + + LOGGER.info('Processing {}'.format(piece)) + + part = load_musicxml(piece) + part = partitura.score.merge_parts(part) + part = partitura.score.unfold_part_maximal(part, update_ids=dataset_name != '4x22') + bm = part.beat_map + + # get indices of the unique onsets + if gracenotes == 'remove': + # Remove grace notes + remove_grace_notes(part) + else: + # expand grace note durations (necessary for correct computation of + # targets) + expand_grace_notes(part) + basis, bf_names = partitura.musicanalysis.make_note_feats(part, list(all_basis_functions)) + + nid_dict = dict((n.id, i) for i, n in enumerate(part.notes_tied)) + + for performance in performances: + if dataset_name == 'asap': + alignment = load_alignment_from_ASAP(performance) + ppart = partitura.load_performance_midi(str(performance).split("_note_alignments/")[0] + ".mid") else: - # expand grace note durations (necessary for correct computation of - # targets) - expand_grace_notes(part) + ppart, alignment = load_match(performance, first_note_at_zero=True) + + #if quirks: todo: check if quirks are really needed + # for n in alignment: + # if n['label'] == 'match': + # n['score_id'] = n['score_id'].split('-')[0] + + assert len(ppart.performedparts) == 1 + ppart = ppart.performedparts[0] + + # compute the targets + targets, snote_ids = perf_codec.encode(part, ppart, alignment) + + matched_subset_idxs = np.array([nid_dict[nid] for nid in snote_ids]) + basis_matched = basis[matched_subset_idxs] + + score_onsets = bm([n.start.t for n in part.notes_tied])[matched_subset_idxs] + unique_onset_idxs = get_unique_onset_idxs(score_onsets) + + i = -2 if dataset_name == 'asap' else -1 + + performance_name = str(performance).split('/')[i] - # compute the basis functions - basis, bf_names = make_basis(part, all_basis_functions) + data.append((basis_matched, bf_names, targets, unique_onset_idxs, name, performance_name)) + return data - # map the basis names returned for this piece to their global - # indices - bf_idx = np.array([bf_idx_map.setdefault(name, len(bf_idx_map)) - for i, name in enumerate(bf_names)]) +class ProcessPiece: + def __init__(self, args): + self.args = args - # a dictionary from note id to index. We need this to select the - # subset of rows in the `basis` array that have a matching entry in - # the targets. - nid_dict = dict((n.id, i) for i, n in enumerate(part.notes_tied)) + def __call__(self, piece): + return process_piece(piece, *self.args) - for match in files['match']: - # if not '_p01' in match: - # continue - name = os.path.splitext(os.path.basename(match))[0] - - LOGGER.info('Processing {}'.format(match)) +def filter_blocklist(pieces): + blocklist = ['Liszt/Sonata', ] + pieces_filtered = [] + for p in pieces: + flag = True + for b in blocklist: + if b in str(p): + flag = False + if flag: + pieces_filtered.append(p) + print(f"filtered out {len(pieces) - len(pieces_filtered)} pieces!") + return pieces_filtered - # load the performed part and the alignment from the match file - ppart, alignment = load_match(match, first_note_at_zero=True) - if quirks: - for n in alignment: - if n['label'] == 'match': - n['score_id'] = n['score_id'].split('-')[0] +def make_datasets(model_specs, root_folder, dataset_name, gracenotes='remove', processes=0): + assert dataset_name in ['4x22', 'magaloff', 'asap'] - # compute the targets - targets, snote_ids = perf_codec.encode(part, ppart, alignment) - + quirks = dataset_name == 'magaloff' - matched_subset_idxs = np.array([nid_dict[nid] for nid in snote_ids]) - basis_matched = basis[matched_subset_idxs] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + all_targets = list(set([n for model_spec in model_specs + for n in model_spec['parameter_names']])) - score_onsets = bm([n.start.t for n in part.notes_tied])[matched_subset_idxs] - unique_onset_idxs = get_unique_onset_idxs(score_onsets) + perf_codec = get_performance_codec(all_targets) - data.append((basis_matched, bf_idx, targets, unique_onset_idxs, name)) - - return piece_data_to_datasets(data, bf_idx_map, model_specs) + bf_idx_map = {} + + all_basis_functions = set([n for model_spec in model_specs + for n in model_spec['basis_functions']]) + + if dataset_name == 'asap':#todo: fix loading of Liszt/Sonata + assert 'asap' in root_folder.split('/')[-1], 'Root folder name must contain "asap"' + pieces = list(Path(root_folder).rglob("*/xml_score.musicxml")) + pieces = filter_blocklist(pieces) + performances = [list(Path(piece).parent.glob("*_note_alignments/note_alignment.tsv")) for piece in pieces] + piece_performances = zip(pieces, performances) + else: + mxml_folder = root_folder + ('xml' if dataset_name == 'magaloff' else 'musicxml') + match_folder = root_folder + 'match' + folders = dict(mxml=mxml_folder, match=match_folder) + paired_files = pair_files(folders, by_prefix=not quirks) + piece_performances = []#[(pf['mxml'][0], list(pf['match'])) for pf in paired_files] + for pf in paired_files.items(): + if 'chopin_op35_Mv3' in pf[0]:#todo: repair loading, do not filter... + continue + piece_performances.append((list(pf[1]['mxml'])[0], list(pf[1]['match']))) + + if processes <= 0: + processes = multiprocessing.cpu_count() + + if processes > 1: + pool = Pool(processes) + pieces = list(pool.map(ProcessPiece((perf_codec, all_basis_functions, gracenotes, dataset_name)), piece_performances)) + else: + pieces = [process_piece(p, perf_codec, all_basis_functions, gracenotes, dataset_name) for p in piece_performances] + pieces = [list(i) for sublist in pieces for i in sublist] + + for piece in pieces: + bf_idx = np.array([bf_idx_map.setdefault(name, len(bf_idx_map)) + for i, name in enumerate(piece[1])]) + piece[1] = bf_idx + + data = [tuple(l) for l in pieces] + + return piece_data_to_datasets(data, bf_idx_map, model_specs) def piece_data_to_datasets(data, bf_idx_map, model_specs): # total number of basis functions in the dataset - #n_basis = len(bf_idx_map) + # n_basis = len(bf_idx_map) idx_bf_map = dict((v, k) for k, v in bf_idx_map.items()) # print(bf_idx_map) # print(bf_idx_inv_map) @@ -165,7 +193,7 @@ def piece_data_to_datasets(data, bf_idx_map, model_specs): output_names_per_model = [] for m_spec in model_specs: # the global indices of the basis functions that this model needs - model_idx = np.concatenate([np.where(input_basis==n)[0] + model_idx = np.concatenate([np.where(input_basis == n)[0] for n in m_spec['basis_functions']]) # trg_idx = np.array([perf_codec.parameter_names.index(n) for n in m_spec['targets']]) n_basis = len(model_idx) @@ -176,33 +204,32 @@ def piece_data_to_datasets(data, bf_idx_map, model_specs): m_datasets = [] m_input_names = [] - for bf, idx, targets, uox, name in data: + for bf, idx, targets, uox, name, perf_name in data: # idx: the global indices that this piece has # the subset of basisfunctions that this model is interested in: useful = np.isin(idx, model_idx) # idx mapped to the subset of basisfunctions for this model - model_idx_subset = np.array([np.where(model_idx==i)[0][0] + model_idx_subset = np.array([np.where(model_idx == i)[0][0] for i in idx[useful]]) # select only the required bfs bf = bf[:, useful] # select only the required targets targets = np.array([targets[n] for n in m_spec['parameter_names']]).T - - if m_spec['onsetwise']: + if m_spec['onsetwise']: bf = notewise_to_onsetwise(bf, uox) targets = notewise_to_onsetwise(targets, uox) - - ds = BasisMixerDataSet(bf, model_idx_subset, n_basis, - targets, m_spec['seq_len'], name) - - m_datasets.append(ds) + ds = BasisMixerDataSet(bf, model_idx_subset, n_basis, targets, + input_names_per_model[-1], output_names_per_model[-1], + m_spec['seq_len'], name, perf_name) + + m_datasets.append(ds) dataset_per_model.append(m_datasets) - + return zip(dataset_per_model, input_names_per_model, output_names_per_model) @@ -260,19 +287,22 @@ class BasisMixerDataSet(Dataset): See Parameters Section. """ - def __init__(self, basis, idx, n_basis, targets, seq_len=1, name=None): + + def __init__(self, basis, idx, n_basis, targets, input_names, output_names, seq_len=1, name=None, perf_name=None): self.basis = basis self.idx = idx self.n_basis = n_basis self.targets = targets self.seq_len = seq_len self.name = name + self.perf_name = perf_name + self.input_names = input_names + self.output_names = output_names @property def piecewise(self): return self.seq_len == -1 - def __getitem__(self, i): if self.piecewise: return self._get_item_piecewise(i) diff --git a/basismixer/helper/__init__.py b/basismixer/helper/__init__.py new file mode 100644 index 0000000..a1e402a --- /dev/null +++ b/basismixer/helper/__init__.py @@ -0,0 +1,4 @@ +from .visualization import performance_player, show_score +from .data import init_dataset +from .rendering import load_model, compute_basis_from_xml, render_midi +from .plotting import plot_basis diff --git a/basismixer/helper/data.py b/basismixer/helper/data.py new file mode 100644 index 0000000..59b0b9e --- /dev/null +++ b/basismixer/helper/data.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python + +import os +import json +import argparse +import tarfile +import io +from urllib.request import urlopen +import urllib +import re +import warnings + +from IPython.display import display, HTML, Audio, update_display +import ipywidgets as widgets +import appdirs + +from basismixer.utils import pair_files + +TIMEOUT = 2 +REPO_NAME = 'vienna4x22_rematched' +DATASET_BRANCH = 'master' +OWNER = 'OFAI' +DATASET_URL = 'https://api.github.com/repos/{}/{}/tarball/{}'.format(OWNER, REPO_NAME, DATASET_BRANCH) + +# oggs will be downloaded from here +OGG_URL_BASE = 'https://spocs.duckdns.org/vienna_4x22/' + +TMP_DIR = appdirs.user_cache_dir('basismixer') +CFG_FILE = os.path.join(TMP_DIR, 'cache.json') +CFG = None +# DATASET_DIR will be set to the path of our data +DATASET_DIR = None +PIECES = () +PERFORMERS = () +SCORE_PERFORMANCE_PAIRS = None + +def load_cfg(): + global CFG + if os.path.exists(CFG_FILE): + with open(CFG_FILE) as f: + CFG = json.load(f) + else: + CFG = {'last_dataset_dir': None} + +def save_cfg(): + with open(CFG_FILE, 'w') as f: + json.dump(CFG, f) + +def get_datasetdir(): + """Get the SHA of the latest commit and return the corresponding + datast directory path. + + """ + commit_url = ('https://api.github.com/repos/{}/{}/commits/{}' + .format(OWNER, REPO_NAME, DATASET_BRANCH)) + try: + + with urlopen(commit_url, timeout=TIMEOUT) as response: + commit = json.load(response) + repo_dirname = '{}-{}-{}'.format(OWNER, REPO_NAME, commit['sha'][:7]) + return os.path.join(TMP_DIR, repo_dirname) + + except urllib.error.URLError as e: + # warnings.warn('{} (url: {})'.format(e, commit_url)) + return CFG.get('last_dataset_dir', None) + except Exception as e: + # warnings.warn('{} (url: {})'.format(e, commit_url)) + return CFG.get('last_dataset_dir', None) + + +def init_dataset(): + global DATASET_DIR, PIECES, PERFORMERS, SCORE_PERFORMANCE_PAIRS + + load_cfg() + + status = widgets.Output() + display(status) + status.clear_output() + + DATASET_DIR = get_datasetdir() + + if DATASET_DIR is None: + status.append_stdout('No internet connection?\n') + + elif os.path.exists(DATASET_DIR): + + status.append_stdout('Vienna 4x22 Corpus already downloaded.\n') + status.append_stdout('Data is in {}'.format(DATASET_DIR)) + + else: + status.append_stdout('Downloading Vienna 4x22 Corpus...') + try: + try: + urldata = urlopen(DATASET_URL).read() + except urllib.error.URLError as e: + # warnings.warn('{} (url: {})'.format(e, DATASET_URL)) + status.append_stdout('error. No internet connection?\n') + return + + with tarfile.open(fileobj=io.BytesIO(urldata)) as archive: + folder = next(iter(archive.getnames()), None) + archive.extractall(TMP_DIR) + if folder: + DATASET_DIR = os.path.join(TMP_DIR, folder) + CFG['last_dataset_dir'] = DATASET_DIR + save_cfg() + # assert DATASET_DIR == os.path.join(TMP_DIR, folder) + + except Exception as e: + status.append_stdout('\nError: {}'.format(e)) + return None + status.append_stdout('done\nData is in {}'.format(DATASET_DIR)) + + if DATASET_DIR is None: + return None + + folders = dict(musicxml=os.path.join(DATASET_DIR, 'musicxml'), + match=os.path.join(DATASET_DIR, 'match')) + + SCORE_PERFORMANCE_PAIRS = [] + paired_files = pair_files(folders) + pieces = sorted(paired_files.keys()) + for piece in pieces: + xml_fn = paired_files[piece]['musicxml'].pop() + for match_fn in sorted(paired_files[piece]['match']): + SCORE_PERFORMANCE_PAIRS.append((xml_fn, match_fn)) + + fn_pat = re.compile('(.*)_(p[0-9][0-9])\.match') + match_files = os.listdir(os.path.join(DATASET_DIR, 'match')) + pieces, performers = zip(*[m.groups() for m in [fn_pat.match(fn) + for fn in match_files] + if m]) + PIECES = sorted(set(pieces)) + PERFORMERS = sorted(set(performers)) + + +if __name__ == '__main__': + init_dataset() diff --git a/basismixer/helper/plotting.py b/basismixer/helper/plotting.py new file mode 100644 index 0000000..9119e9e --- /dev/null +++ b/basismixer/helper/plotting.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +import numpy as np +# import argparse +import matplotlib.pyplot as plt +from basismixer.helper.visualization import make_plot + +def plot_basis(basis, names, onsets=None, title=None): + n_basis = basis.shape[1] + + if onsets is None: + x = np.arange(len(basis)) + else: + x = onsets + + w = len(x)/30 + h = n_basis + + fig, axs = plt.subplots(n_basis, sharex=True, + gridspec_kw={'hspace': 0}) + if n_basis == 1: + axs = [axs] + + fig.set_size_inches(w, h) + + if title: + fig.suptitle(title) + + for i, name in enumerate(names): + axs[i].fill_between(x, 0, basis[:, i], label=name) + axs[i].legend(frameon=False, loc='upper left') + + fig.tight_layout() + + if title: + fig.subplots_adjust(top=0.95) + + # fig.savefig(out_fn) + +def plot_predictions_and_targets(predictions, targets): + param_names = predictions.dtype.names + n_params = len(param_names) + fig, axs = plt.subplots(n_params, sharex=True) + + fig.set_size_inches(len(predictions) / 30, n_params) + for i, pn in enumerate(param_names): + axs[i].plot(predictions[pn], color='firebrick', + label='predictions') + if targets is not None: + axs[i].plot(targets[:, i], color='blue', label='targets') + axs[i].set_title(pn) + axs[i].legend(frameon=False, loc='upper left') + + fig.tight_layout() + + +def plot_predictions(predictions, onsets=None, + param_names=None): + + if param_names is None: + param_names = predictions.dtype.names + fig, axs = plt.subplots(len(param_names), + sharex=True, + gridspec_kw={'hspace': 0.15}) + plt.subplots_adjust(left=0.07, right=0.99, top=.99, bottom=0.1) + + make_plot(fig, axs, predictions[param_names], onsets) + + + + +# def main(): +# parser = argparse.ArgumentParser(description="Do something") +# parser.add_argument("file", help="some file") +# args = parser.parse_args() + + +# if __name__ == '__main__': +# main() diff --git a/basismixer/helper/predictions.py b/basismixer/helper/predictions.py new file mode 100644 index 0000000..9d7c96d --- /dev/null +++ b/basismixer/helper/predictions.py @@ -0,0 +1,133 @@ +import json +import logging +import os + +import numpy as np +import torch +from torch.utils.data import DataLoader, ConcatDataset +# from torch.utils.data.sampler import SubsetRandomSampler + +from partitura.utils import partition +from basismixer.predictive_models import (construct_model as c_model, + SupervisedTrainer, + MSELoss) +from basismixer.utils import load_pyc_bz, save_pyc_bz + +logging.basicConfig(level=logging.INFO) +LOGGER = logging.getLogger(__name__) + +RNG = np.random.RandomState(1984) + +def construct_model(config, in_names, out_names, out_dir): + model_cfg = config['model'].copy() + model_cfg['args']['input_names'] = in_names + model_cfg['args']['input_size'] = len(in_names) + model_cfg['args']['output_names'] = out_names + model_cfg['args']['output_size'] = len(out_names) + model_cfg['args']['input_type'] = 'onsetwise' if config['onsetwise'] else 'notewise' + model_name = ('-'.join(out_names) + + '-' + ('onsetwise' if config['onsetwise'] else 'notewise')) + model_out_dir = os.path.join(out_dir, model_name) + if not os.path.exists(model_out_dir): + os.mkdir(model_out_dir) + # save model config for later saving model + config_out = os.path.join(model_out_dir, 'config.json') + LOGGER.info('Saving config in {0}'.format(config_out)) + json.dump(jsonize_dict(model_cfg), + open(config_out, 'w'), + indent=2) + model = c_model(model_cfg) + + return model, model_out_dir + +def setup_output_directory(out_dir='/tmp/trained_models'): + if not os.path.exists(out_dir): + os.mkdir(out_dir) + return out_dir + +def jsonize_dict(input_dict): + out_dict = dict() + for k, v in input_dict.items(): + if isinstance(v, np.ndarray): + out_dict[k] = v.tolist() + elif isinstance(v, dict): + out_dict[k] = jsonize_dict(v) + else: + out_dict[k] = v + return out_dict + +def split_datasets_by_piece(datasets, test_size=0.2, valid_size=0.2): + + by_piece = partition(lambda d: '_'.join(d.name.split('_')[:-1]), datasets) + pieces = list(by_piece.keys()) + RNG.shuffle(pieces) + + n_test = max(1, int(np.round(test_size*len(pieces)))) + n_valid = max(1, int(np.round(valid_size*len(pieces)))) + n_train = len(pieces) - n_test - n_valid + + if n_train < 1: + raise Exception('Not enough pieces to split datasets according ' + 'to the specified test/validation proportions') + + test_pieces = pieces[:n_test] + valid_pieces = pieces[n_test:n_test+n_valid] + train_pieces = pieces[n_test+n_valid:] + + test_set = [d for pd in [by_piece[p] for p in test_pieces] for d in pd] + valid_set = [d for pd in [by_piece[p] for p in valid_pieces] for d in pd] + train_set = [d for pd in [by_piece[p] for p in train_pieces] for d in pd] + + return (ConcatDataset(train_set), + ConcatDataset(valid_set), + ConcatDataset(test_set)) + + +def split_datasets(datasets, test_size=0.2, valid_size=0.2): + + n_pieces = len(datasets) + + dataset_idx = np.arange(n_pieces) + RNG.shuffle(dataset_idx) + len_test = int(n_pieces * test_size) + len_valid = np.maximum(int((n_pieces - len_test) * valid_size), 1) + + test_idxs = dataset_idx[:len_test] + valid_idxs = dataset_idx[len_test:len_test + len_valid] + train_idxs = dataset_idx[len_test + len_valid:] + + return (ConcatDataset([datasets[i] for i in train_idxs]), + ConcatDataset([datasets[i] for i in valid_idxs]), + ConcatDataset([datasets[i] for i in test_idxs])) + + + + +def train_model(model, train_set, valid_set, + config, out_dir): + batch_size = config['train_args'].pop('batch_size') + + #### Create train and validation data loaders ##### + train_loader = DataLoader(train_set, + batch_size=batch_size, + shuffle=True) + valid_loader = DataLoader(valid_set, + batch_size=batch_size, + shuffle=False) + + loss = MSELoss() + + ### Construct the optimizer #### + optim_name, optim_args = config['train_args']['optimizer'] + optim = getattr(torch.optim, optim_name) + config['train_args']['optimizer'] = optim(model.parameters(), **optim_args) + train_args = config['train_args'] + train_args.pop('seq_len', None) + trainer = SupervisedTrainer(model=model, + train_loss=loss, + valid_loss=loss, + train_dataloader=train_loader, + valid_dataloader=valid_loader, + out_dir=out_dir, + **config['train_args']) + trainer.train() diff --git a/basismixer/helper/rendering.py b/basismixer/helper/rendering.py new file mode 100644 index 0000000..52563eb --- /dev/null +++ b/basismixer/helper/rendering.py @@ -0,0 +1,140 @@ +import json +import os + +import torch +import numpy as np +import subprocess +import soundfile +import tempfile +import logging + +from IPython.display import display, Audio + +from partitura import save_performance_midi, load_musicxml, load_score_midi +from partitura.score import expand_grace_notes, unfold_part_maximal +from basismixer.predictive_models import FullPredictiveModel, construct_model +from basismixer.performance_codec import get_performance_codec +from partitura.musicanalysis import make_note_feats + +from basismixer.helper.predictions import setup_output_directory + +LOGGER = logging.getLogger(__name__) + +def path_to_trained_models(path=setup_output_directory()): + if not os.path.exists(path): + print('Models not found! Using sample models') + path = './sample_data/models' + return path + + +def render_midi(midi_fn): + + with tempfile.NamedTemporaryFile() as out_file: + cmd = ['timidity', '-E', 'F', 'reverb=0', 'F', 'chorus=0', + '--output-mono', '-Ov', '-o', out_file.name, midi_fn] + try: + ps = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if ps.returncode != 0: + LOGGER.error('Command {} failed with code {} (stderr: {})' + .format(cmd, ps.returncode, ps.stderr.decode('UTF8'))) + return False + except FileNotFoundError as f: + LOGGER.error('Executing "{}" returned {}.' + .format(' '.join(cmd), f)) + return False + data, fs = soundfile.read(out_file.name) + aw = display(Audio(data=data, rate=fs, autoplay=True), display_id=True) + return aw + + +def load_model(models_dir): + models = [] + for f in os.listdir(models_dir): + path = os.path.join(models_dir, f) + if os.path.isdir(path): + model_config = json.load(open(os.path.join(path, 'config.json'))) + params = torch.load(os.path.join(path, 'best_model.pth'), + map_location=torch.device('cpu'))['state_dict'] + + model = construct_model(model_config, params) + models.append(model) + + + + output_names = list(set([name for out_name in [m.output_names for m in models] for name in out_name])) + input_names = list(set([name for in_name in [m.input_names for m in models] for name in in_name])) + input_names.sort() + output_names.sort() + + default_values = dict( + velocity_trend=64, + velocity_dev=0, + beat_period_standardized=0, + timing=0, + articulation_log=0, + beat_period_mean=0.5, + beat_period_std=0.1) + all_output_names = list(default_values.keys()) + full_model = FullPredictiveModel(models, input_names, + all_output_names, default_values) + + not_in_model_names = set(all_output_names).difference(output_names) + + print('Trained models include the following parameters:\n' + + '\n'.join(output_names) + '\n\n' + 'The following parameters will use default values:\n'+ + '\n' + '\n'.join(['{0}:{1:.2f}'.format(k, default_values[k]) + for k in not_in_model_names])) + + + return full_model, output_names + +def sanitize_performed_part(ppart): + """Avoid negative durations in notes. + + """ + for n in ppart.notes: + + if n['note_off'] < n['note_on']: + n['note_off'] = n['note_on'] + + if n['sound_off'] < n['note_off']: + n['sound_off'] = n['note_off'] + + +def post_process_predictions(predictions): + max_articulation = 1.5 + max_bps = 1 + max_timing = 0.2 + predictions['articulation_log'] = np.clip(predictions['articulation_log'], + -max_articulation, max_articulation) + predictions['velocity_dev'] = np.clip(predictions['velocity_dev'], 0, 0.8) + predictions['beat_period_standardized'] = np.clip(predictions['beat_period_standardized'], + -max_bps, max_bps) + predictions['timing'] = np.clip(predictions['timing'], + -max_timing, max_timing) + predictions['velocity_trend'][predictions['velocity_trend'] > 0.8] = 0.8 + + + + +def compute_basis_from_xml(xml_fn, input_names): + # Load MusicXML file + part = load_musicxml(xml_fn, force_note_ids=True) + assert len(part.parts) == 1 + part = part.parts[0] + expand_grace_notes(part) + part = unfold_part_maximal(part) + + # Compute basis functions + _basis, bf_names = make_note_feats(part, list(set([bf.split('.')[0] for bf in input_names]))) + basis = np.zeros((len(_basis), len(input_names))) + for i, n in enumerate(input_names): + try: + ix = bf_names.index(n) + except ValueError: + continue + basis[:, i] = _basis[:, ix] + + return basis, part + diff --git a/basismixer/helper/visualization.py b/basismixer/helper/visualization.py new file mode 100644 index 0000000..3948eb7 --- /dev/null +++ b/basismixer/helper/visualization.py @@ -0,0 +1,310 @@ +#!/usr/bin/env ipython + +import threading +from functools import partial +import time +import io +import os +import logging +from urllib.request import urlopen + +from IPython.display import display, HTML, Audio, update_display, Image +# from ipywidgets import interact, interactive, fixed +import ipywidgets as widgets +import matplotlib.pyplot as plt +import numpy as np +from scipy.interpolate import interp1d +import soundfile + +import partitura +# import partitura.score as score +from partitura.utils import partition +import basismixer +import basismixer.performance_codec as pc + +LOGGER = logging.getLogger(__name__) + +OGG_URL_BASE = 'https://basismixer.cp.jku.at/static/vienna4x22/' +# PERF_CODEC = pc.PerformanceCodec(pc.TimeCodec(), pc.NotewiseDynamicsCodec()) +PERF_CODEC = pc.PerformanceCodec(pc.TimeCodec(normalization='beat_period_standardized'), + pc.OnsetwiseDecompositionDynamicsCodec()) + +plt.rcParams.update({'font.size': 8}) + +def show_score(piece): + if isinstance(piece, int): + piece = data.PIECES[piece] + display(Image(os.path.join(data.DATASET_DIR, 'png', '{}.png'.format(piece)))) + + +def load_performance_audio(piece, performer): + url = '{}{}_{}.ogg'.format(OGG_URL_BASE, piece, performer) + try: + audio, fs = soundfile.read(io.BytesIO(urlopen(url).read()), always_2d=True) + audio = audio.mean(1) + return audio, fs + except: + return None, None + + +def get_performance_info(piece, performer): + assert data.DATASET_DIR + musicxml_fn = os.path.join(data.DATASET_DIR, 'musicxml', '{}.musicxml'.format(piece)) + match_fn = os.path.join(data.DATASET_DIR, 'match', '{}_{}.match'.format(piece, performer)) + + part = partitura.load_musicxml(musicxml_fn) + + ppart, alignment = partitura.load_match(match_fn, first_note_at_zero=True) + return part, ppart, alignment + + +def show_performance(piece, performer, fig, axs, keep_zoom): + part, ppart, alignment = get_performance_info(piece, performer) + targets, snote_ids = PERF_CODEC.encode(part, ppart, alignment) + + # we convert to f8 to avoid numerical problems when computing means + dtype = [(n, 'f8') for n in targets.dtype.names] + targets = targets.astype(dtype) + + part_by_id = dict((n.id, n) for n in part.notes_tied) + ppart_by_id = dict((n['id'], n) for n in ppart.notes) + s_to_p_id = dict((a['score_id'], a['performance_id']) + for a in alignment if a['label'] == 'match') + s_notes = [part_by_id[n] for n in snote_ids] + p_notes = [ppart_by_id[s_to_p_id[n]] for n in snote_ids] + + bm = part.beat_map + s_onsets = bm([n.start.t for n in s_notes]) + p_onsets = np.array([n['note_on'] for n in p_notes]) + measure_times = np.array([(m.start.t, '{}'.format(m.number)) for m in + part.iter_all(partitura.score.Measure)], + dtype=[('t', 'f4'), ('label', 'U100')]) + + measure_times['t'] = bm(measure_times['t']) + + make_plot(fig, axs, targets, onsets=s_onsets, xlabel='Measure number', + xticks=measure_times, keep_zoom=keep_zoom) # , title='{} {}'.format(piece, performer)) + + s_times = np.r_[s_onsets, s_notes[-1].end.t] + p_times = np.r_[p_onsets, p_notes[-1]['note_off']] + # score_perf_map = interp1d(s_onsets, p_onsets, bounds_error=False, fill_value='extrapolate') + score_perf_map = interp1d(s_times, p_times, bounds_error=False, fill_value=(p_times[0], p_times[-1])) + + return score_perf_map + + +def make_plot(fig, axs, targets, onsets=None, xticks=None, title=None, + xlabel=None, start=None, end=None, keep_zoom=False): + names = targets.dtype.names + + xlims = [] + ylims = [] + for ax in axs: + if keep_zoom: + xlims.append(list(ax.get_xlim())) + ylims.append(list(ax.get_ylim())) + ax.clear() + + n_targets = len(names) + + if onsets is None: + x = np.arange(len(targets)) + else: + x = onsets + + w = len(x)/30 + h = n_targets + + if end is not None: + idx = x < end + x = x[idx] + targets = targets[idx] + + if start is not None: + idx = x >= start + x = x[idx] + targets = targets[idx] + + if n_targets == 1: + axs = [axs] + + # fig.set_size_inches(w, h) + + if title: + fig.suptitle(title) + + by_onset = partition(lambda ix: ix[1], enumerate(x)) + for k, v in by_onset.items(): + by_onset[k] = np.array([i for i, _ in v]) + + for i, name in enumerate(names): + target = targets[name] + targets[np.isnan(target)] = 0 + + axs[i].plot(x, target, '.', label=name) + + if xticks is not None: + axs[i].set_xticks(xticks['t']) + axs[i].set_xticklabels(xticks['label']) + axs[i].xaxis.grid() + + tt = [] + vv = [] + for t, v in by_onset.items(): + tt.append(t) + vv.append(np.mean(target[v])) + + # axs[i].plot(tt, vv, label='{} (mean)'.format(name)) + axs[i].plot(tt, vv) + + axs[i].legend(frameon=False, loc=2) + + if keep_zoom: + axs[0].set_xlim(xlims[0]) + for xlim, ylim, ax in zip(xlims, ylims, axs): + ax.set_ylim(ylim) + + return fig, axs + + +def performance_player(): + status = widgets.Output() + piece_dd = widgets.Dropdown(options=data.PIECES, description='Piece:') + performer_dd = widgets.Dropdown(options=data.PERFORMERS, description='Performer:') + keep_lims_chbox = widgets.Checkbox(value=False, description='Keep zoom') + reset_lims = widgets.Button(description='Zoom to fit', + button_style='', # 'success', 'info', 'warning', 'danger' or '' + tooltip='Zoom to fit', + icon='check' + ) + + + if data.PIECES and data.PERFORMERS: + current_performance = [data.PIECES[0], data.PERFORMERS[0]] + else: + current_performance = [None, None] + + audio, fs = None, None + score_perf_map = None + aw = None + keep_zoom = False + + fig, axs = plt.subplots(len(PERF_CODEC.parameter_names), + sharex=True, + gridspec_kw={'hspace': 0.15}) + plt.subplots_adjust(left=0.07, right=0.99, top=.99, bottom=0.1) + + def update_current_perf(info, item): + nonlocal current_performance + if item == 'piece': + current_performance[0] = info['new'] + else: + current_performance[1] = info['new'] + set_performance(*current_performance) + + def set_performance(piece, performer): + nonlocal audio, fs, score_perf_map, aw + + audio, fs = load_performance_audio(piece, performer) + score_perf_map = show_performance(piece, performer, fig, axs, keep_zoom) + + if keep_zoom: + s, e = axs[0].get_xlim() + start = max(0, int(score_perf_map(s)*fs)) + end = min(len(audio), int(score_perf_map(e)*fs)) + excerpt = audio[start:end] + else: + excerpt = audio + if aw is None: + aw = display(Audio(data=excerpt, rate=fs, autoplay=True), display_id=True) + else: + aw.update(Audio(data=excerpt, rate=fs, autoplay=True)) + + def set_keep_zoom(v): + nonlocal keep_zoom + keep_zoom = v['new'] + + def do_reset_zoom(v): + nonlocal axs, fig + for ax in axs: + ax.autoscale() + ax.autoscale_view() + fig.canvas.draw() + + piece_dd.observe(partial(update_current_perf, item='piece'), names=['value']) + performer_dd.observe(partial(update_current_perf, item='performer'), names=['value']) + keep_lims_chbox.observe(set_keep_zoom, names=['value']) + reset_lims.on_click(do_reset_zoom) + + display(widgets.HBox([piece_dd, performer_dd, keep_lims_chbox, reset_lims])) + display(status) + + set_performance(*current_performance) + + cursor = [] + play_range = [None, None] + thread_stop = None + + def on_mouse_down(event): + nonlocal play_range, thread_stop + if thread_stop: + thread_stop.set() + play_range[0] = event.xdata + + def on_mouse_up(event): + nonlocal play_range, cursor, thread_stop + play_range[1] = event.xdata + play_range.sort() + + while cursor: + cursor.pop().remove() + + for ax in axs: + cursor.append(ax.fill_betweenx(ax.get_ylim(), play_range[0], play_range[1], alpha=.2, color='gray')) + + fig.canvas.draw() + + start = max(0, int(score_perf_map(play_range[0])*fs)) + end = min(len(audio), int(score_perf_map(play_range[1])*fs)) + aw.display(Audio(data=audio[start:end], rate=fs, autoplay=True)) + + # duration = play_range[1] - play_range[0] + # thread_stop = threading.Event() + # thread = threading.Thread( + # target=time_cursor_thread, + # args=(fig, axs[0], play_range[0], play_range[1], duration, thread_stop)) + # thread.start() + + cid1 = fig.canvas.mpl_connect('button_press_event', on_mouse_down) + cid2 = fig.canvas.mpl_connect('button_release_event', on_mouse_up) + + +def time_cursor_thread(fig, ax, start, end, duration, ev, rate=1): + + color='black' + x = start + vline = ax.axvline(x, c=color) + delta_x = (end-start)/(duration*rate) + delta_t = 1/rate + + while not ev.is_set() and x < end: + fig.canvas.draw() + vline.set(xdata=np.array([x, x])) + # fig.canvas.blit(ax.bbox) # doesn't reliably update + x += delta_x + time.sleep(delta_t) + + vline.remove() + fig.canvas.draw() + + +def to_matched_score(note_pairs, beat_map): + ms = [] + for sn, n in note_pairs: + sn_on, sn_off = beat_map([sn.start.t, sn.end.t]) + sn_dur = sn_off - sn_on + n_dur = n['sound_off'] - n['note_on'] + ms.append((sn_on, sn_dur, sn.midi_pitch, n['note_on'], n_dur, n['velocity'])) + fields = [('onset', 'f4'), ('duration', 'f4'), ('pitch', 'i4'), + ('p_onset', 'f4'), ('p_duration', 'f4'), ('velocity', 'i4')] + return np.array(ms, dtype=fields) diff --git a/basismixer/parse_tsv_alignment.py b/basismixer/parse_tsv_alignment.py new file mode 100644 index 0000000..3b4d200 --- /dev/null +++ b/basismixer/parse_tsv_alignment.py @@ -0,0 +1,195 @@ +from partitura.utils import ensure_notearray +import numpy as np +import os + + +def alignment_dicts_to_array(alignment): + """ + create structured array from list of dicts type alignment. + + Parameters + ---------- + alignment : list + A list of note alignment dictionaries. + + Returns + ------- + alignarray : structured ndarray + Structured array containing note alignment. + """ + fields = [('idx', 'i4'), + ('matchtype', 'U256'), + ('partid', 'U256'), + ('ppartid', 'U256')] + + array = [] + # for all dicts create an appropriate entry in an array: + # match = 0, deletion = 1, insertion = 2 + for no, i in enumerate(alignment): + if i["label"] == "match": + array.append((no, "0", i["score_id"], str(i["performance_id"]))) + elif i["label"] == "insertion": + array.append((no, "2", "undefined", str(i["performance_id"]))) + elif i["label"] == "deletion": + array.append((no, "1", i["score_id"], "undefined")) + alignarray = np.array(array, dtype=fields) + + return alignarray + + +def save_csv_for_parangonada(outdir, part, ppart, align, + zalign=None, feature=None): + """ + Save an alignment for visualization with parangonda. + + Parameters + ---------- + outdir : str + A directory to save the files into. + part : Part, structured ndarray + A score part or its note_array. + ppart : PerformedPart, structured ndarray + A PerformedPart or its note_array. + align : list + A list of note alignment dictionaries. + zalign : list, optional + A second list of note alignment dictionaries. + feature : list, optional + A list of expressive feature dictionaries. + + """ + + part = ensure_notearray(part) + ppart = ensure_notearray(ppart) + + ffields = [('velocity', ' 1, all quirk? + alignlist.append({"label": "match", "score_id": field0, "performance_id": fields[1]}) + elif fields[0] == "insertion": + alignlist.append({"label": "insertion", "performance_id": fields[1]}) + elif fields[0][0] == "n" and fields[1].startswith("deletion"): + field0 = fields[0]#.split("-")[0] + alignlist.append({"label": "deletion", "score_id": field0}) + else: + raise Exception(f"Unknown alignment type: {fields[0]}") + + return alignlist \ No newline at end of file diff --git a/basismixer/predictive_models/architectures.py b/basismixer/predictive_models/architectures.py index 65e0ed1..175ce49 100644 --- a/basismixer/predictive_models/architectures.py +++ b/basismixer/predictive_models/architectures.py @@ -81,6 +81,7 @@ def __init__(self, input_size, output_size, recurrent_size, hidden_size, n_layers=1, dropout=0.0, + recurrent_unit='GRU', dense_nl=nn.ReLU(), bidirectional=True, batch_first=True, @@ -102,7 +103,15 @@ def __init__(self, self.n_layers = n_layers self.batch_first = batch_first self.bidirectional = bidirectional - self.rnn = nn.GRU(input_size, self.recurrent_size, + self.recurrent_unit = recurrent_unit + if recurrent_unit == 'GRU': + recurrent_unit = nn.GRU + elif recurrent_unit == 'LSTM': + recurrent_unit = nn.LSTM + else: + raise Exception(recurrent_unit + "is not supported as recurrent unit") + + self.rnn = recurrent_unit(input_size, self.recurrent_size, self.n_layers, batch_first=batch_first, dropout=dropout, bidirectional=self.bidirectional) @@ -117,19 +126,22 @@ def __init__(self, if self.output_names is None: self.output_names = [str(i) for i in range(self.output_size)] - def init_hidden(self, batch_size): + def init_hidden(self, x): if self.bidirectional: n_layers = 2 * self.n_layers else: n_layers = self.n_layers - return torch.zeros(n_layers, batch_size, self.recurrent_size) + if self.recurrent_unit == 'LSTM': + return (torch.zeros(n_layers, x.size(0), self.recurrent_size).type(x.type()), + torch.zeros(n_layers, x.size(0), self.recurrent_size).type(x.type())) + return torch.zeros(n_layers, x.size(0), self.recurrent_size).type(x.type()) @standardize def forward(self, x): batch_size = x.size(0) seq_len = x.size(1) - h0 = self.init_hidden(batch_size).type(x.type()) - # tensor of shape (batch_size, seq_len, hidden_size*2) if bidirectional + h0 = self.init_hidden(x) + # tensor of shape (batch_size, seq_len, hidden_size*2) if bidirectional, tuple of 2 tensors if LSTM output, h = self.rnn(x, h0) flatten_shape = (self.recurrent_size * 2 if self.bidirectional else self.recurrent_size) diff --git a/basismixer/predictive_models/base.py b/basismixer/predictive_models/base.py index e486631..f2cc9db 100644 --- a/basismixer/predictive_models/base.py +++ b/basismixer/predictive_models/base.py @@ -274,3 +274,11 @@ def dtype(self): def dtype(self, dtype): self._dtype = dtype self.type(dtype) + + def to(self, *args, **kwargs): + result = super().to(*args, **kwargs) + try: + self.device = next(result.parameters()).device + except StopIteration: + pass# needn't update device if we have no params + return result diff --git a/basismixer/predictive_models/train.py b/basismixer/predictive_models/train.py index f16321f..0863b86 100644 --- a/basismixer/predictive_models/train.py +++ b/basismixer/predictive_models/train.py @@ -8,7 +8,7 @@ from torch.utils.data import Dataset, Sampler import torch.nn.functional as functional from tqdm import tqdm - +import sys LOGGER = logging.getLogger(__name__) @@ -170,19 +170,20 @@ def compute_data_stats(self): # self.model.out_std = out_std - def train(self): + def train(self, fold=None): self.compute_data_stats() train_loss_name = getattr(self.train_loss, 'name', 'Train Loss') - train_fn = os.path.join(self.out_dir, 'train_loss.txt') + + train_fn = os.path.join(self.out_dir, f'train_loss{fold}.txt') # Initialize TrainProgressMonitors train_losses = TrainProgressMonitor(train_loss_name, fn=train_fn) valid_loss_name = None valid_losses = None if self.valid_dataloader is not None: - valid_fn = os.path.join(self.out_dir, 'valid_loss.txt') + valid_fn = os.path.join(self.out_dir, f'valid_loss{fold}.txt') if isinstance(self.valid_loss, (list, tuple)): valid_loss_name = [getattr(crit, 'name', 'Valid Loss {0}'.format(i)) for i, crit in enumerate(self.valid_loss)] @@ -194,21 +195,26 @@ def train(self): validations_wo_improvement = 0 + vl, r2 = self.valid_step(0) + valid_losses.update(0, vl, r2) + LOGGER.info('valid loss before training:' + valid_losses.last_loss + ' r2:' + str(r2)) + # save before training self.save_checkpoint(-1, False, True) try: for epoch in range(self.start_epoch, self.epochs): tl = self.train_step(epoch) - train_losses.update(epoch, tl) + train_losses.update(epoch, tl, r2) do_checkpoint = np.mod(epoch + 1, self.save_freq) == 0 if do_checkpoint: if self.valid_dataloader is not None: - vl = self.valid_step(epoch) - valid_losses.update(epoch, vl) - LOGGER.info(train_losses.last_loss + '\t' + valid_losses.last_loss) + vl, r2 = self.valid_step(epoch) + valid_losses.update(epoch, vl, r2) + LOGGER.info('t_loss:' + train_losses.last_loss + '\t v_loss:' + valid_losses.last_loss + + '\t r2:' + str(r2)) else: vl = [tl] LOGGER.info(train_losses.last_loss) @@ -298,6 +304,7 @@ def __init__(self, name='', fn='/tmp/train_progres.txt', self.name = name self.losses = [] self.epochs = [] + self.correlations = [] self.fn = fn self.show_fmt = show_fmt self.write_fmt = write_fmt @@ -316,13 +323,13 @@ def __init__(self, name='', fn='/tmp/train_progres.txt', f.write(header) - def update(self, epoch, loss): + def update(self, epoch, loss, correlations=None): """ Append new loss(es) and update the log file """ self.losses.append(loss) - self.epochs.append(epoch) + self.correlations.append(correlations) self.update_log() @@ -348,6 +355,13 @@ def update_log(self): else: out_str = self.write_fmt.format(float(self.losses[-1])) + if self.correlations[-1] is not None: + out_str += '\t r2:' + if isinstance(self.correlations[-1], (list, tuple, np.ndarray)): + out_str += '\t'.join([self.write_fmt.format(l) for l in self.correlations[-1]]) + else: + out_str += self.write_fmt.format(float(self.correlations[-1])) + with open(self.fn, 'a') as f: f.write('{0}\t{1}\n'.format(self.epochs[-1], out_str)) @@ -364,7 +378,8 @@ def __init__(self, model, train_loss, optimizer, save_freq=10, early_stopping=100, out_dir='.', - resume_from_saved_model=None): + resume_from_saved_model=None, + **rest): super().__init__(model=model, train_loss=train_loss, optimizer=optimizer, @@ -409,6 +424,7 @@ def train_step(self, epoch, *args, **kwargs): self.optimizer.zero_grad() loss.backward() + torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1) self.optimizer.step() if self.lr_scheduler is not None: @@ -416,10 +432,22 @@ def train_step(self, epoch, *args, **kwargs): return np.mean(losses) + @staticmethod + def r2(preds, targets): + pm = torch.mean(preds, 1, keepdim=True) + tm = torch.mean(targets, 1, keepdim=True) + n = (preds - pm) * (targets - tm) + n = n.sum(1) + + d = torch.sqrt(((preds - pm)**2).sum(1) * ((targets - tm)**2).sum(1)) + + return (n / d).mean(0) + def valid_step(self, *args, **kwargs): self.model.eval() losses = [] + correlations = [] with torch.no_grad(): for input, target in self.valid_dataloader: @@ -439,7 +467,9 @@ def valid_step(self, *args, **kwargs): loss = [self.valid_loss(output, target)] losses.append([l.item() for l in loss]) - return np.mean(losses, axis=0) + correlations.append(self.r2(output, target).cpu().numpy()) + + return np.mean(losses, axis=0), np.ma.masked_invalid(correlations).mean(0) class MSELoss(nn.Module): @@ -447,3 +477,23 @@ class MSELoss(nn.Module): def __call__(self, predictions, targets): return functional.mse_loss(predictions, targets) + + +class MultiMSELoss(nn.Module): + + WEIGHTS = {'velocity_dev': 1000, 'timing': 1000, 'articulation_log': 0.5, 'velocity_trend': 10, 'beat_period_standardized': 1, + 'beat_period_mean': 1, 'beat_period_std': 1} + + def __init__(self, targets): + super(MultiMSELoss, self).__init__() + self.task_num = len(targets) + self.log_vars = nn.Parameter(torch.zeros((self.task_num))) + self.weights = [self.WEIGHTS[t] for t in targets] + + def forward(self, preds, targets): + loss = 0 + + for i, v in enumerate(self.log_vars): + loss += torch.exp(-v) * self.weights[i] * functional.mse_loss(preds, targets) + v + + return loss \ No newline at end of file diff --git a/basismixer/utils/__init__.py b/basismixer/utils/__init__.py index e4e3321..d7bf78b 100644 --- a/basismixer/utils/__init__.py +++ b/basismixer/utils/__init__.py @@ -5,7 +5,9 @@ save_pyc_bz, to_memmap, pair_files, - clip) + clip, + split_datasets_by_piece, + prepare_datasets_for_model) from .music import ( get_unique_onset_idxs, diff --git a/basismixer/utils/generic.py b/basismixer/utils/generic.py index 7965c2a..b764a93 100644 --- a/basismixer/utils/generic.py +++ b/basismixer/utils/generic.py @@ -6,6 +6,7 @@ from collections import defaultdict import numpy as np +from torch.utils.data import ConcatDataset def load_pyc_bz(fn): @@ -129,5 +130,57 @@ def clip(v, low=0, high=127): v[too_high] = high +def split_datasets_by_piece(datasets, fold=0, folds=5, dataset_name='magaloff'): + from partitura.utils import partition + from pandas_ods_reader import read_ods - + if dataset_name == 'asap': + ods = read_ods("../basismixer/assets/perfwise_insertions_deletions.ods") + + relevant = ods.values[:, :2] + robust = [r[0].split('asap-dataset\\')[1] for r in relevant if r[1] in ['c']] # , 'c + highs', 'c + ornaments' + + robust_performances = [] + for d in datasets: + for r in robust: + if d.perf_name in r and d.name in r: + robust_performances.append(d) + datasets = robust_performances + + by_piece = partition(lambda d: d.name, datasets) + pieces = list(by_piece.keys()) + + RNG = np.random.RandomState(1984) + RNG.shuffle(pieces) + + + test_size = 1 / folds + n_test = max(1, int(np.round(test_size*len(pieces)))) + n_train = len(pieces) - n_test + + if n_train < 1: + raise Exception('Not enough pieces to split datasets according ' + 'to the specified test/validation proportions') + + test_start = n_test * fold + test_end = n_test * (1 + fold) + test_pieces = pieces[test_start:test_end] + train_pieces = [p for p in pieces if not p in test_pieces] + + test_set = [d for pd in [by_piece[p] for p in test_pieces] for d in pd] + train_set = [d for pd in [by_piece[p] for p in train_pieces] for d in pd] + + return (ConcatDataset(train_set), + ConcatDataset(test_set)) + + +def prepare_datasets_for_model(datasets, model_config): + for bmds in datasets: + targets, output_names = [], [] + for param in model_config['parameter_names']: + i = bmds.output_names.index(param) + targets.append(bmds.targets[:, i]) + output_names.append(bmds.output_names[i]) + bmds.targets, bmds.output_names = np.stack(targets, 1), np.array(output_names) + + return datasets \ No newline at end of file diff --git a/bin/cross_validate_model.py b/bin/cross_validate_model.py new file mode 100644 index 0000000..1fc44bd --- /dev/null +++ b/bin/cross_validate_model.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python + +import argparse +import json +import logging +import os +from functools import partialmethod + +import numpy as np +import torch +from partitura import save_performance_midi, save_match +from torch.utils.data import DataLoader, ConcatDataset +from tqdm import tqdm + +from basismixer.helper.rendering import compute_basis_from_xml +from basismixer.performance_codec import get_performance_codec +from basismixer.predictive_models.train import MultiMSELoss, MSELoss + +logging.basicConfig(level=logging.INFO) + +from basismixer.predictive_models import (construct_model, + SupervisedTrainer, + FullPredictiveModel) +from basismixer.utils import load_pyc_bz, save_pyc_bz, split_datasets_by_piece, prepare_datasets_for_model, \ + post_process_predictions +from basismixer import make_datasets + +LOGGER = logging.getLogger(__name__) + +tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) + +# def my_basis(part): +# W = np.array([n.midi_pitch for n in part.notes_tied]).astype(np.float) +# return W.reshape((-1, 1)), ['my'] + +basis_features = ['polynomial_pitch_feature', 'duration_feature', 'metrical_strength_feature'] + +CONFIG = [ + dict(onsetwise=False, + basis_functions=basis_features, + parameter_names=['velocity_dev', 'timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', + 'beat_period_mean', 'beat_period_std'],# 'velocity_dev','timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', 'beat_period_mean', 'beat_period_std'], #['velocity_dev', 'timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', 'beat_period_mean', 'beat_period_std'] + seq_len=50, + model=dict(constructor=['basismixer.predictive_models', 'RecurrentModel'], + args=dict(recurrent_size=128, + n_layers=1, + hidden_size=64)), + train_args=dict( + optimizer_params=['Adam', dict(lr=1e-4)], + epochs=20, + save_freq=1, + early_stopping=100, + batch_size=128, + ) + ) +] + +def jsonize_dict(input_dict): + out_dict = dict() + for k, v in input_dict.items(): + if isinstance(v, np.ndarray): + out_dict[k] = v.tolist() + elif isinstance(v, dict): + out_dict[k] = jsonize_dict(v) + else: + out_dict[k] = v + return out_dict + + +def render_fold_match(model, pieces, fold): + import warnings + predicter = FullPredictiveModel([model], in_names, out_names) + perf_codec = get_performance_codec(predicter.output_names) + for piece in pieces: + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + if args.dataset_name == 'magaloff': + xml_fn = args.dataset_root_folder + 'xml/' + piece + '.xml' + elif args.dataset_name == 'asap': + xml_fn = args.dataset_root_folder + f'{piece}/xml_score.musicxml' + else: + xml_fn = args.dataset_root_folder + 'musicxml/' + piece + '.musicxml' + basis, part = compute_basis_from_xml(xml_fn, model.input_names) + onsets = np.array([n.start.t for n in part.notes_tied]) + preds = predicter.predict(basis, onsets) + post_process_predictions(preds) + predicted_ppart = perf_codec.decode(part, preds) + out_folder = args.out_dir + f'/CV_fold_{fold}/' + os.makedirs(out_folder, exist_ok=True) + piece = piece.replace('/', '-') + save_performance_midi(predicted_ppart, out_folder + f'{piece}.mid') + alignment = [{'label': 'match', 'score_id': sn.id, 'performance_id': pn['id']} for sn, pn in zip(part.notes_tied, predicted_ppart.notes)] + save_match(alignment, predicted_ppart, part, out_folder + f'{piece}.match') + except Exception as e: + print(f"could not render {piece}") + print(e) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Train a Model given a dataset") + parser.add_argument("dataset_name", choices=["asap", "4x22", "magaloff"], help="Folder with MusicXML files") + parser.add_argument("dataset_root_folder", help="Root folder of the dataset") + parser.add_argument("--folds", help="number of folds in CV", default=10) + parser.add_argument("--cache", help=( + 'Path to pickled datasets file. If specified and the file exists, ' + 'and the cached data matches the model specs' #<---todo + 'the `dataset_root_folder` option will be ignored')) + parser.add_argument("--pieces", help="Text file with valid pieces", + default=None) + parser.add_argument("--model-config", help="Model configuration", + default=CONFIG) + parser.add_argument("--out-dir", help="Output directory", + default='/tmp') + parser.add_argument('--targets', default=[], nargs='+') + + args = parser.parse_args() + + if args.targets: + CONFIG[0]["parameter_names"] = args.targets + + folds = args.folds + + # Load model architecture + if not isinstance(args.model_config, list): + model_config = json.load(open(args.model_config)) + else: + model_config = args.model_config + + if not os.path.exists(args.out_dir): + os.mkdir(args.out_dir) + + json.dump(model_config, + open(os.path.join(args.out_dir, 'model_config.json'), 'w'), + indent=2) + + if args.pieces is not None: + print('valid_pieces') + args.pieces = np.loadtxt(args.pieces, dtype=str) + + rng = np.random.RandomState(1984) + + datasets = [] + models = [] + target_idxs = [] + + if args.cache and os.path.exists(args.cache): + LOGGER.info('Loading data from {}'.format(args.cache)) + datasets = list(load_pyc_bz(args.cache)) + if args.targets: + datasets[0] = (datasets[0][0], datasets[0][1], args.targets) + else: + datasets = make_datasets(model_config, + args.dataset_root_folder, + args.dataset_name) + if args.cache: + LOGGER.info('Saving data to {}'.format(args.cache)) + save_pyc_bz(datasets, args.cache) + + for (mdatasets, in_names, out_names), config in zip(datasets, model_config): + + mdatasets = prepare_datasets_for_model(mdatasets, config) + dataset = ConcatDataset(mdatasets) + batch_size = config['train_args'].pop('batch_size') + + for fold in range(folds): + #### Create train and validation data loaders ##### + train_set, test_set = split_datasets_by_piece(dataset.datasets, fold, folds, False) + train_loader, valid_loader = DataLoader(train_set, batch_size=batch_size), \ + DataLoader(test_set, batch_size=batch_size) + + #### Construct Models #### + + model_cfg = config['model'].copy() + model_cfg['args']['input_names'] = in_names + model_cfg['args']['input_size'] = len(in_names) + model_cfg['args']['output_names'] = config['parameter_names'] + model_cfg['args']['output_size'] = len(config['parameter_names']) + model_cfg['args']['input_type'] = 'onsetwise' if config['onsetwise'] else 'notewise' + model_name = ('-'.join(out_names) + + '-' + ('onsetwise' if config['onsetwise'] else 'notewise')) + model_out_dir = os.path.join(args.out_dir, model_name) + if not os.path.exists(model_out_dir): + os.mkdir(model_out_dir) + # save model config for later saving model + json.dump(jsonize_dict(model_cfg), + open(os.path.join(model_out_dir, 'config.json'), 'w'), + indent=2) + model = construct_model(model_cfg) + + loss = MultiMSELoss(config['parameter_names']) if len(config['parameter_names']) > 1 else MSELoss() + + ### Construct the optimizer #### + optim_name, optim_args = config['train_args']['optimizer_params'] + optim = getattr(torch.optim, optim_name) + optim = optim(model.parameters(), **optim_args) + + trainer = SupervisedTrainer(model=model, + train_loss=loss, + valid_loss=loss, + train_dataloader=train_loader, + valid_dataloader=valid_loader, + out_dir=model_out_dir, + optimizer=optim, + **config['train_args']) + + trainer.train(f'_{fold}') + + test_performance_names = set([t.name for t in test_set.datasets]) + test_pieces = [p for p in mdatasets if p.name in test_performance_names] + + render_fold_match(model, test_performance_names, fold) + diff --git a/bin/load_alignment_example b/bin/load_alignment_example.py old mode 100755 new mode 100644 similarity index 99% rename from bin/load_alignment_example rename to bin/load_alignment_example.py index b10a81d..5ec0574 --- a/bin/load_alignment_example +++ b/bin/load_alignment_example.py @@ -1,17 +1,14 @@ #!/usr/bin/env python import argparse -import json -import logging import matplotlib.pyplot as plt import numpy as np - import partitura from partitura.utils import partition -import basismixer.basisfunctions -from basismixer.utils import pair_files + import basismixer.performance_codec as pc +from basismixer.utils import pair_files def main(): diff --git a/bin/load_data_example b/bin/load_data_example.py old mode 100755 new mode 100644 similarity index 99% rename from bin/load_data_example rename to bin/load_data_example.py index d4e5820..0f79bc3 --- a/bin/load_data_example +++ b/bin/load_data_example.py @@ -25,7 +25,7 @@ from basismixer.utils import save_pyc_bz from basismixer.utils import pair_files - +import sys LOGGER = logging.getLogger(__name__) diff --git a/bin/make_basis_example b/bin/make_basis_example.py old mode 100755 new mode 100644 similarity index 92% rename from bin/make_basis_example rename to bin/make_basis_example.py index 608ae82..a277ca5 --- a/bin/make_basis_example +++ b/bin/make_basis_example.py @@ -1,23 +1,18 @@ #!/usr/bin/env python import argparse -import json -import logging -import numpy as np import matplotlib.pyplot as plt -import torch - +import numpy as np import partitura +import partitura.musicanalysis as ma +import partitura.score -import basismixer -from basismixer.utils import to_memmap -import basismixer.basisfunctions as bf def main(): parser = argparse.ArgumentParser(description="Create basis functions for a MusicXML file") parser.add_argument("musicxml", help="MusicXML file") - parser.add_argument("--basis", type=str, nargs='+', help='names of one or more basis functions') + parser.add_argument("--basis", type=str, nargs='+', help='names of one or more basis features') # parser.add_argument("--cachefolder", type=str, help='Cache folder') # parser.add_argument("--basisconfig", type=str, # help=("JSON file specifying a set of basis functions for each expressive target. " @@ -28,8 +23,10 @@ def main(): # basis_names = list(set(i for ii in basis_config.values() for i in ii)) part = partitura.load_musicxml(args.musicxml) + part = partitura.score.merge_parts(part) + part = partitura.score.unfold_part_maximal(part, update_ids=False) print(part.pretty()) - basis, names = bf.make_basis(part, args.basis) + basis, names = ma.make_note_feats(part, args.basis) # plot onsets = None # np.array([n.start.t for n in part.notes_tied]) plot_basis(basis, names, '/tmp/out.png', onsets, title=part.part_name) diff --git a/bin/render_performance b/bin/render_performance.py old mode 100755 new mode 100644 similarity index 97% rename from bin/render_performance rename to bin/render_performance.py index 7787b07..69f0d34 --- a/bin/render_performance +++ b/bin/render_performance.py @@ -24,7 +24,7 @@ remove_grace_notes) from basismixer import TOY_MODEL_CONFIG -from basismixer.basisfunctions import make_basis +from partitura.musicanalysis import make_note_feats from basismixer.performance_codec import get_performance_codec from basismixer.predictive_models import (FullPredictiveModel, construct_model) @@ -37,7 +37,9 @@ RENDER_CONFIG) logging.basicConfig(level=logging.INFO) +import sys LOGGER = logging.getLogger(__name__) +LOGGER.addHandler(logging.StreamHandler(sys.stdout)) def load_model(model_config, default_values=DEFAULT_VALUES): @@ -134,7 +136,7 @@ def compute_basis_from_score(score_fn, input_names): # part = unfold_part_maximal(part) # Compute basis functions - _basis, bf_names = make_basis(part, list(set([bf.split('.')[0] for bf in input_names]))) + _basis, bf_names = make_note_feats(part, list(set([bf.split('.')[0] for bf in input_names]))) basis = np.zeros((len(_basis), len(input_names))) for i, n in enumerate(input_names): try: @@ -146,7 +148,6 @@ def compute_basis_from_score(score_fn, input_names): return basis, part - def predict(model_config, score_fn, default_values=DEFAULT_VALUES): """ Main method for predicting a performance. diff --git a/bin/train_model_example b/bin/train_model_example deleted file mode 100755 index 2b5eac6..0000000 --- a/bin/train_model_example +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python - -import argparse -import json -import logging -import os - -import numpy as np -import torch - -from torch.utils.data.sampler import SubsetRandomSampler -from torch.utils.data import DataLoader, ConcatDataset - -logging.basicConfig(level=logging.INFO) - -from basismixer.predictive_models import (construct_model, - SupervisedTrainer, - MSELoss) -from basismixer.utils import load_pyc_bz, save_pyc_bz -from basismixer import make_datasets - -LOGGER = logging.getLogger(__name__) - -# def my_basis(part): -# W = np.array([n.midi_pitch for n in part.notes_tied]).astype(np.float) -# return W.reshape((-1, 1)), ['my'] - -CONFIG = [ - dict(onsetwise=False, - basis_functions=['polynomial_pitch_basis', - 'loudness_direction_basis', - 'tempo_direction_basis', - 'articulation_basis', - 'duration_basis', - # my_basis, - 'grace_basis', - 'slur_basis', - 'fermata_basis', - # 'metrical_basis' - 'metrical_strength_basis', - 'time_signature_basis', - 'relative_score_position_basis' - ], - parameter_names=['velocity_dev', 'timing', 'articulation_log'], - seq_len=1, - model=dict(constructor=['basismixer.predictive_models', 'FeedForwardModel'], - args=dict(hidden_size=128)), - train_args=dict( - optimizer=['Adam', dict(lr=1e-4)], - epochs=10, - save_freq=10, - early_stopping=100, - batch_size=1000, - ) - ), - dict(onsetwise=True, - basis_functions=['polynomial_pitch_basis', - 'loudness_direction_basis', - 'tempo_direction_basis', - 'articulation_basis', - 'duration_basis', - 'slur_basis', - 'grace_basis', - 'fermata_basis', - # 'metrical_basis' - 'metrical_strength_basis', - 'time_signature_basis', - 'relative_score_position_basis' - ], - parameter_names=['velocity_trend', 'beat_period_standardized', - 'beat_period_mean', 'beat_period_std'], - seq_len=100, - model=dict(constructor=['basismixer.predictive_models', 'RecurrentModel'], - args=dict(recurrent_size=128, - n_layers=1, - hidden_size=64)), - train_args=dict( - optimizer=['Adam', dict(lr=1e-4)], - epochs=10, - save_freq=5, - early_stopping=100, - batch_size=50, - ) - ) -] - -def jsonize_dict(input_dict): - out_dict = dict() - for k, v in input_dict.items(): - if isinstance(v, np.ndarray): - out_dict[k] = v.tolist() - elif isinstance(v, dict): - out_dict[k] = jsonize_dict(v) - else: - out_dict[k] = v - return out_dict - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="Train a Model given a dataset") - parser.add_argument("xmlfolder", help="Folder with MusicXML files") - parser.add_argument("matchfolder", help="Folder with match files") - parser.add_argument("--datasets", help=( - 'Path to pickled datasets file. If specified and the file exists, ' - 'the `xmlfolder` and `matchfolder` options will be ignored, and it ' - 'will be assumed that datasets in the specified file correspond to ' - 'the model configuration. If specifed and the path does not exist, ' - 'the datasets are computed and saved to the specified path.')) - parser.add_argument("--quirks", action='store_true', - help="Use this option when training on magaloff/zeilinger") - parser.add_argument("--pieces", help="Text file with valid pieces", - default=None) - parser.add_argument("--model-config", help="Model configuration", - default=CONFIG) - parser.add_argument("--out-dir", help="Output directory", - default='/tmp') - args = parser.parse_args() - - # Load model architecture - if not isinstance(args.model_config, list): - model_config = json.load(open(args.model_config)) - else: - model_config = args.model_config - - if not os.path.exists(args.out_dir): - os.mkdir(args.out_dir) - - json.dump(model_config, - open(os.path.join(args.out_dir, 'model_config.json'), 'w'), - indent=2) - - if args.pieces is not None: - print('valid_pieces') - args.pieces = np.loadtxt(args.pieces, dtype=str) - - rng = np.random.RandomState(1984) - - datasets = [] - models = [] - target_idxs = [] - input_idxs = [] - valid_size = 0.20 - - if args.datasets and os.path.exists(args.datasets): - LOGGER.info('Loading data from {}'.format(args.datasets)) - datasets = load_pyc_bz(args.datasets) - else: - datasets = make_datasets(model_config, - args.xmlfolder, - args.matchfolder, - pieces=args.pieces, - quirks=args.quirks) - if args.datasets: - LOGGER.info('Saving data to {}'.format(args.datasets)) - save_pyc_bz(datasets, args.datasets) - - for (mdatasets, in_names, out_names), config in zip(datasets, model_config): - dataset = ConcatDataset(mdatasets) - batch_size = config['train_args'].pop('batch_size') - - #### Create train and validation data loaders ##### - dataset_idx = np.arange(len(dataset)) - rng.shuffle(dataset_idx) - len_valid = int(np.round(len(dataset) * valid_size)) - valid_idx = dataset_idx[0:len_valid] - train_idx = dataset_idx[len_valid:] - - train_sampler = SubsetRandomSampler(train_idx) - valid_sampler = SubsetRandomSampler(valid_idx) - train_loader = DataLoader(dataset, - batch_size=batch_size, - sampler=train_sampler) - valid_loader = DataLoader(dataset, - batch_size=batch_size, - sampler=valid_sampler) - - #### Construct Models #### - - model_cfg = config['model'].copy() - model_cfg['args']['input_names'] = in_names - model_cfg['args']['input_size'] = len(in_names) - model_cfg['args']['output_names'] = out_names - model_cfg['args']['output_size'] = len(out_names) - model_cfg['args']['input_type'] = 'onsetwise' if config['onsetwise'] else 'notewise' - model_name = ('-'.join(out_names) + - '-' + ('onsetwise' if config['onsetwise'] else 'notewise')) - model_out_dir = os.path.join(args.out_dir, model_name) - if not os.path.exists(model_out_dir): - os.mkdir(model_out_dir) - # save model config for later saving model - json.dump(jsonize_dict(model_cfg), - open(os.path.join(model_out_dir, 'config.json'), 'w'), - indent=2) - model = construct_model(model_cfg) - - loss = MSELoss() - - ### Construct the optimizer #### - optim_name, optim_args = config['train_args']['optimizer'] - optim = getattr(torch.optim, optim_name) - config['train_args']['optimizer'] = optim(model.parameters(), **optim_args) - - trainer = SupervisedTrainer(model=model, - train_loss=loss, - valid_loss=loss, - train_dataloader=train_loader, - valid_dataloader=valid_loader, - out_dir=model_out_dir, - **config['train_args']) - - trainer.train() - diff --git a/bin/train_model_example.py b/bin/train_model_example.py new file mode 100644 index 0000000..2c5eaf9 --- /dev/null +++ b/bin/train_model_example.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python + +import argparse +import json +import logging +import os + +import numpy as np +import torch +from torch.utils.data import DataLoader, ConcatDataset + +from basismixer.predictive_models.train import MultiMSELoss + +logging.basicConfig(level=logging.INFO) + +from basismixer.predictive_models import (construct_model, + SupervisedTrainer) +from basismixer.utils import load_pyc_bz, save_pyc_bz, split_datasets_by_piece, prepare_datasets_for_model +from basismixer import make_datasets + +LOGGER = logging.getLogger(__name__) + +# def my_basis(part): +# W = np.array([n.midi_pitch for n in part.notes_tied]).astype(np.float) +# return W.reshape((-1, 1)), ['my'] + +basis_features = ['polynomial_pitch_feature', 'duration_feature', 'metrical_strength_feature'] + +CONFIG = [ + dict(onsetwise=False, + basis_functions=basis_features, + parameter_names=['velocity_dev','timing', 'articulation_log', 'velocity_trend'],# 'velocity_dev','timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', 'beat_period_mean', 'beat_period_std'], #['velocity_dev', 'timing', 'articulation_log', 'velocity_trend', 'beat_period_standardized', 'beat_period_mean', 'beat_period_std'] + seq_len=50, + model=dict(constructor=['basismixer.predictive_models', 'RecurrentModel'], + args=dict(recurrent_size=128, + n_layers=1, + hidden_size=64)), + train_args=dict( + optimizer=['Adam', dict(lr=1e-4)], + epochs=10, + save_freq=1, + early_stopping=100, + batch_size=50, + ) + ) +] + +def jsonize_dict(input_dict): + out_dict = dict() + for k, v in input_dict.items(): + if isinstance(v, np.ndarray): + out_dict[k] = v.tolist() + elif isinstance(v, dict): + out_dict[k] = jsonize_dict(v) + else: + out_dict[k] = v + return out_dict + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Train a Model given a dataset") + parser.add_argument("dataset_name", choices=["asap", "4x22", "magaloff"], help="Folder with MusicXML files") + parser.add_argument("dataset_root_folder", help="Root folder of the dataset") + parser.add_argument("--cache", help=( + 'Path to pickled datasets file. If specified and the file exists, ' + 'and the cached data matches the model specs' #<---todo + 'the `dataset_root_folder` option will be ignored')) + parser.add_argument("--pieces", help="Text file with valid pieces", + default=None) + parser.add_argument("--model-config", help="Model configuration", + default=CONFIG) + parser.add_argument("--out-dir", help="Output directory", + default='/tmp') + args = parser.parse_args() + + # Load model architecture + if not isinstance(args.model_config, list): + model_config = json.load(open(args.model_config)) + else: + model_config = args.model_config + + if not os.path.exists(args.out_dir): + os.mkdir(args.out_dir) + + json.dump(model_config, + open(os.path.join(args.out_dir, 'model_config.json'), 'w'), + indent=2) + + if args.pieces is not None: + print('valid_pieces') + args.pieces = np.loadtxt(args.pieces, dtype=str) + + rng = np.random.RandomState(1984) + + datasets = [] + models = [] + target_idxs = [] + + if args.cache and os.path.exists(args.cache): + LOGGER.info('Loading data from {}'.format(args.cache)) + datasets = load_pyc_bz(args.cache) + else: + datasets = make_datasets(model_config, + args.dataset_root_folder, + args.dataset_name) + if args.cache: + LOGGER.info('Saving data to {}'.format(args.cache)) + save_pyc_bz(datasets, args.cache) + + for (mdatasets, in_names, out_names), config in zip(datasets, model_config): + mdatasets = prepare_datasets_for_model(mdatasets, config) + dataset = ConcatDataset(mdatasets) + batch_size = config['train_args'].pop('batch_size') + + #### Create train and validation data loaders ##### + train_set, test_set = split_datasets_by_piece(dataset.datasets, 0, 5, False) + train_loader, valid_loader = DataLoader(train_set, batch_size=batch_size), \ + DataLoader(test_set, batch_size=batch_size) + + #### Construct Models #### + + model_cfg = config['model'].copy() + model_cfg['args']['input_names'] = in_names + model_cfg['args']['input_size'] = len(in_names) + model_cfg['args']['output_names'] = config['parameter_names'] + model_cfg['args']['output_size'] = len(config['parameter_names']) + model_cfg['args']['input_type'] = 'onsetwise' if config['onsetwise'] else 'notewise' + model_name = ('-'.join(out_names) + + '-' + ('onsetwise' if config['onsetwise'] else 'notewise')) + model_out_dir = os.path.join(args.out_dir, model_name) + if not os.path.exists(model_out_dir): + os.mkdir(model_out_dir) + # save model config for later saving model + json.dump(jsonize_dict(model_cfg), + open(os.path.join(model_out_dir, 'config.json'), 'w'), + indent=2) + model = construct_model(model_cfg) + + loss = MultiMSELoss(config['parameter_names'])#MSELoss() + + ### Construct the optimizer #### + optim_name, optim_args = config['train_args']['optimizer'] + optim = getattr(torch.optim, optim_name) + config['train_args']['optimizer'] = optim(list(model.parameters()) + list(loss.parameters()), **optim_args) + + trainer = SupervisedTrainer(model=model, + train_loss=loss, + valid_loss=loss, + train_dataloader=train_loader, + valid_dataloader=valid_loader, + out_dir=model_out_dir, + **config['train_args']) + + trainer.train() + diff --git a/environment.yml b/environment.yml index 39dbd60..090f28d 100644 --- a/environment.yml +++ b/environment.yml @@ -18,5 +18,5 @@ dependencies: - pip: - appdirs - soundfile - - git+https://github.com/OFAI/partitura.git@develop - - git+https://github.com/OFAI/basismixer.git@develop + - git+https://github.com/OFAI/partitura.git@main + - git+https://github.com/OFAI/basismixer.git@asap diff --git a/requirements.txt b/requirements.txt index 7068e4e..a50eb87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ scipy>=1.2 numpy>=1.16 torch>=1.3 tqdm>=4.43 -git+https://github.com/OFAI/partitura.git@develop +git+https://github.com/OFAI/partitura.git@main diff --git a/setup.py b/setup.py index ef8634d..29b2d2b 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ EMAIL = '' AUTHOR = 'Carlos Cancino-Chacón, Maarten Grachten' REQUIRES_PYTHON = '>=3.5' -VERSION = '0.1.0' +VERSION = '0.1.1' # What packages are required for this module to be executed? REQUIRED = [