Skip to content

Commit 49abae3

Browse files
vine: fixed_location task groups (#3787)
* merge vine_task.c * unused code * add worker code * rebase * more merging * regular task issue fix * cleanup allocations * compatible with function calls * remove group reference on cancel * add tune option for task groups * format * get groupid from file recovery task * comment * merge in priority queue * worker cache ensure check queued tasks * commit task after group list removal * do not group recovery tasks * format * format after rebase * add group counter to manager * do not check refcount * change group hash table to itable * change type at worker
1 parent c682ccb commit 49abae3

File tree

12 files changed

+243
-15
lines changed

12 files changed

+243
-15
lines changed

taskvine/src/manager/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ SOURCES = \
2727
vine_current_transfers.c \
2828
vine_file_replica_table.c \
2929
vine_fair.c \
30-
vine_runtime_dir.c
30+
vine_runtime_dir.c \
31+
vine_task_groups.c
3132

3233
PUBLIC_HEADERS = taskvine.h
3334

taskvine/src/manager/taskvine.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ typedef enum {
7373
/** Select overall scheduling algorithm for matching tasks to workers. */
7474
typedef enum {
7575
VINE_SCHEDULE_UNSET = 0, /**< Internal use only. */
76-
VINE_SCHEDULE_FCFS, /**< Select worker on a first-come-first-serve basis. */
77-
VINE_SCHEDULE_FILES, /**< Select worker that has the most data required by the task. (default) */
78-
VINE_SCHEDULE_TIME, /**< Select worker that has the fastest execution time on previous tasks. */
79-
VINE_SCHEDULE_RAND, /**< Select a random worker. */
80-
VINE_SCHEDULE_WORST /**< Select the worst fit worker (the worker with more unused resources). */
76+
VINE_SCHEDULE_FCFS, /**< Select worker on a first-come-first-serve basis. */
77+
VINE_SCHEDULE_FILES, /**< Select worker that has the most data required by the task. (default) */
78+
VINE_SCHEDULE_TIME, /**< Select worker that has the fastest execution time on previous tasks. */
79+
VINE_SCHEDULE_RAND, /**< Select a random worker. */
80+
VINE_SCHEDULE_WORST /**< Select the worst fit worker (the worker with more unused resources). */
8181
} vine_schedule_t;
8282

8383
/** Possible outcomes for a task, returned by @ref vine_task_get_result.

taskvine/src/manager/vine_manager.c

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ See the file COPYING for details.
2424
#include "vine_runtime_dir.h"
2525
#include "vine_schedule.h"
2626
#include "vine_task.h"
27+
#include "vine_task_groups.h"
2728
#include "vine_task_info.h"
2829
#include "vine_taskgraph_log.h"
2930
#include "vine_txn_log.h"
@@ -2992,6 +2993,35 @@ static vine_result_code_t commit_task_to_worker(struct vine_manager *q, struct v
29922993
return result;
29932994
}
29942995

2996+
static vine_result_code_t commit_task_group_to_worker(struct vine_manager *q, struct vine_worker_info *w, struct vine_task *t)
2997+
{
2998+
vine_result_code_t result = VINE_SUCCESS;
2999+
3000+
struct list *l = NULL;
3001+
if (t->group_id) {
3002+
l = itable_lookup(q->task_group_table, t->group_id);
3003+
list_remove(l, t);
3004+
// decrement refcount
3005+
vine_task_delete(t);
3006+
}
3007+
3008+
int counter = 0;
3009+
do {
3010+
3011+
if (counter && (result == VINE_SUCCESS)) {
3012+
int t_idx = priority_queue_find_idx(q->ready_tasks, t);
3013+
priority_queue_remove(q->ready_tasks, t_idx);
3014+
// decrement refcount
3015+
vine_task_delete(t);
3016+
}
3017+
result = commit_task_to_worker(q, w, t);
3018+
counter++;
3019+
} while ((l && (t = list_pop_head(l))));
3020+
3021+
debug(D_VINE, "Sent batch of %d tasks to worker %s", counter, w->hostname);
3022+
return result;
3023+
}
3024+
29953025
/* 1 if task resubmitted, 0 otherwise */
29963026
static int resubmit_task_on_exhaustion(struct vine_manager *q, struct vine_worker_info *w, struct vine_task *t)
29973027
{
@@ -3300,6 +3330,9 @@ static void vine_manager_consider_recovery_task(struct vine_manager *q, struct v
33003330
if (!rt)
33013331
return;
33023332

3333+
/* Do not try to group recovery tasks */
3334+
rt->group_id = 0;
3335+
33033336
switch (rt->state) {
33043337
case VINE_TASK_INITIAL:
33053338
/* The recovery task has never been run, so submit it now. */
@@ -3441,7 +3474,26 @@ static int send_one_task(struct vine_manager *q)
34413474

34423475
if (w) {
34433476
priority_queue_remove(q->ready_tasks, t_idx);
3444-
vine_result_code_t result = commit_task_to_worker(q, w, t);
3477+
3478+
// do not continue if this worker is running a group task
3479+
if (q->task_groups_enabled) {
3480+
struct vine_task *it;
3481+
uint64_t taskid;
3482+
ITABLE_ITERATE(w->current_tasks, taskid, it)
3483+
{
3484+
if (it->group_id) {
3485+
return 0;
3486+
}
3487+
}
3488+
}
3489+
3490+
vine_result_code_t result;
3491+
if (q->task_groups_enabled) {
3492+
result = commit_task_group_to_worker(q, w, t);
3493+
} else {
3494+
result = commit_task_to_worker(q, w, t);
3495+
}
3496+
34453497
switch (result) {
34463498
case VINE_SUCCESS:
34473499
/* return on successful commit. */
@@ -3945,6 +3997,8 @@ struct vine_manager *vine_ssl_create(int port, const char *key, const char *cert
39453997

39463998
q->factory_table = hash_table_create(0, 0);
39473999
q->current_transfer_table = hash_table_create(0, 0);
4000+
q->task_group_table = itable_create(0);
4001+
q->group_id_counter = 1;
39484002
q->fetch_factory = 0;
39494003

39504004
q->measured_local_resources = rmsummary_create(-1);
@@ -4010,6 +4064,8 @@ struct vine_manager *vine_ssl_create(int port, const char *key, const char *cert
40104064
// peer transfers enabled by default
40114065
q->peer_transfers_enabled = 1;
40124066

4067+
q->task_groups_enabled = 0;
4068+
40134069
q->load_from_shared_fs_enabled = 0;
40144070

40154071
q->file_source_max_transfers = VINE_FILE_SOURCE_MAX_TRANSFERS;
@@ -4287,6 +4343,9 @@ void vine_delete(struct vine_manager *q)
42874343
vine_current_transfers_clear(q);
42884344
hash_table_delete(q->current_transfer_table);
42894345

4346+
vine_task_groups_clear(q);
4347+
itable_delete(q->task_group_table);
4348+
42904349
itable_clear(q->tasks, (void *)delete_task_at_exit);
42914350
itable_delete(q->tasks);
42924351

@@ -4656,6 +4715,11 @@ int vine_submit(struct vine_manager *q, struct vine_task *t)
46564715
vine_task_set_scheduler(t, VINE_SCHEDULE_FILES);
46574716
}
46584717

4718+
/* Attempt to group this task based on temp dependencies. */
4719+
if (q->task_groups_enabled) {
4720+
vine_task_groups_assign_task(q, t);
4721+
}
4722+
46594723
/* If the task produces temporary files, create recovery tasks for those. */
46604724
vine_manager_create_recovery_tasks(q, t);
46614725

@@ -5472,6 +5536,15 @@ int vine_cancel_by_task_id(struct vine_manager *q, int task_id)
54725536
return 0;
54735537
}
54745538

5539+
if (task->group_id) {
5540+
struct list *l = itable_lookup(q->task_group_table, task->group_id);
5541+
if (l) {
5542+
list_remove(l, task);
5543+
}
5544+
task->group_id = 0;
5545+
vine_task_delete(task);
5546+
}
5547+
54755548
reset_task_to_state(q, task, VINE_TASK_RETRIEVED);
54765549

54775550
task->result = VINE_RESULT_CANCELLED;
@@ -5673,6 +5746,9 @@ int vine_tune(struct vine_manager *q, const char *name, double value)
56735746
} else if (!strcmp(name, "update-interval")) {
56745747
q->update_interval = MAX(1, (int)value);
56755748

5749+
} else if (!strcmp(name, "task-groups")) {
5750+
q->task_groups_enabled = MIN(1, (int)value);
5751+
56765752
} else if (!strcmp(name, "resource-management-interval")) {
56775753
q->resource_management_interval = MAX(1, (int)value);
56785754

taskvine/src/manager/vine_manager.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ struct vine_manager {
118118
struct hash_table *workers_with_watched_file_updates; /* Maps link -> vine_worker_info */
119119
struct hash_table *workers_with_complete_tasks; /* Maps link -> vine_worker_info */
120120
struct hash_table *current_transfer_table; /* Maps uuid -> struct transfer_pair */
121+
struct itable *task_group_table; /* Maps group id -> list vine_task */
121122

122123
/* Primary data structures for tracking files. */
123124

@@ -181,6 +182,10 @@ struct vine_manager {
181182
int tasks_waiting_last_hungry; /* Number of tasks originally waiting when call to vine_hungry_computation was made. */
182183
timestamp_t hungry_check_interval; /* Maximum interval between vine_hungry_computation checks. */
183184

185+
/* Task Groups Configuration */
186+
int task_groups_enabled;
187+
int group_id_counter;
188+
184189
/* Various performance knobs that can be tuned. */
185190
int short_timeout; /* Timeout in seconds to send/recv a brief message from worker */
186191
int long_timeout; /* Timeout if in the middle of an incomplete message. */

taskvine/src/manager/vine_manager_put.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,10 @@ vine_result_code_t vine_manager_put_task(
561561
}
562562
}
563563

564+
if (t->group_id) {
565+
vine_manager_send(q, w, "groupid %d\n", t->group_id);
566+
}
567+
564568
// vine_manager_send returns the number of bytes sent, or a number less than
565569
// zero to indicate errors. We are lazy here, we only check the last
566570
// message we sent to the worker (other messages may have failed above).

taskvine/src/manager/vine_task.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ struct vine_task *vine_task_create(const char *command_line)
8181
t->priority = 0;
8282

8383
vine_counters.task.created++;
84+
t->group_id = 0;
8485

8586
return t;
8687
}
@@ -256,6 +257,11 @@ struct vine_task *vine_task_copy(const struct vine_task *task)
256257
new->resources_requested = rmsummary_copy(task->resources_requested, 0);
257258
}
258259

260+
/* Group ID is copied. */
261+
if (task->group_id) {
262+
new->group_id = task->group_id;
263+
}
264+
259265
return new;
260266
}
261267

taskvine/src/manager/vine_task.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ End user may only use the API described in taskvine.h
1818

1919
#include "list.h"
2020
#include "category.h"
21+
#include "uuid.h"
2122

2223
#include <stdint.h>
2324

@@ -130,11 +131,13 @@ struct vine_task {
130131
struct rmsummary *resources_requested; /**< Number of cores, disk, memory, time, etc. the task requires. */
131132
struct rmsummary *current_resource_box; /**< Resources allocated to the task on this specific worker. */
132133

133-
double sandbox_measured; /**< On completion, the maximum size observed of the disk used by the task for output and ephemeral files. */
134+
double sandbox_measured; /**< On completion, the maximum size observed of the disk used by the task for output and ephemeral files. */
134135

135136
int has_fixed_locations; /**< Whether at least one file was added with the VINE_FIXED_LOCATION flag. Task fails immediately if no
136137
worker can satisfy all the strict inputs of the task. */
137138

139+
int group_id; /**< When enabled, group ID will be assigned based on temp file dependencies of this task */
140+
138141
int refcount; /**< Number of remaining references to this object. */
139142
};
140143

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
Copyright (C) 2022- The University of Notre Dame
3+
This software is distributed under the GNU General Public License.
4+
See the file COPYING for details.
5+
*/
6+
7+
#include "vine_task_groups.h"
8+
#include "debug.h"
9+
#include "vine_mount.h"
10+
#include "vine_task.h"
11+
#include "stringtools.h"
12+
13+
// create a new task group for this task based on the temp mount file
14+
static int vine_task_groups_create_group(struct vine_manager *q, struct vine_task *t, struct vine_mount *m)
15+
{
16+
int id = q->group_id_counter++;
17+
struct list *l = list_create();
18+
19+
t->group_id = id;
20+
21+
struct vine_task *tc = vine_task_addref(t);
22+
23+
list_push_head(l, tc);
24+
itable_insert(q->task_group_table, id, l);
25+
return 1;
26+
}
27+
28+
// locate the group with the task which outputs the desired file, and add the new task
29+
static int vine_task_groups_add_to_group(struct vine_manager *q, struct vine_task *t, struct vine_mount *m)
30+
{
31+
int id = m->file->recovery_task->group_id;
32+
33+
if (id) {
34+
struct list *group = itable_lookup(q->task_group_table, id);
35+
t->group_id = id;
36+
struct vine_task *tc = vine_task_addref(t);
37+
list_push_tail(group, tc);
38+
}
39+
40+
return 0;
41+
}
42+
43+
/*
44+
When a task comes in through vine_submit, look for temp files in its inputs/outputs
45+
If there is a temp file on the input there is already a task group it should be assigned to.
46+
If there is only a temp output it would be the first of a new group.
47+
*/
48+
int vine_task_groups_assign_task(struct vine_manager *q, struct vine_task *t)
49+
{
50+
struct vine_mount *input_mount;
51+
struct vine_mount *output_mount;
52+
53+
int inputs_present = 0;
54+
int outputs_present = 0;
55+
56+
LIST_ITERATE(t->input_mounts, input_mount)
57+
{
58+
if (input_mount->file->type == VINE_TEMP) {
59+
inputs_present++;
60+
break;
61+
}
62+
}
63+
64+
LIST_ITERATE(t->output_mounts, output_mount)
65+
{
66+
if (output_mount->file->type == VINE_TEMP) {
67+
outputs_present++;
68+
break;
69+
}
70+
}
71+
72+
// could also be inputs_present && outputs_present
73+
if (inputs_present) {
74+
vine_task_groups_add_to_group(q, t, input_mount);
75+
debug(D_VINE, "Assigned task to group %d", t->group_id);
76+
} else if (outputs_present) {
77+
vine_task_groups_create_group(q, t, output_mount);
78+
debug(D_VINE, "Create task with group %d", t->group_id);
79+
}
80+
81+
return inputs_present || outputs_present;
82+
}
83+
84+
static void vine_task_group_delete(struct list *l)
85+
{
86+
if (l) {
87+
list_delete(l);
88+
}
89+
}
90+
91+
void vine_task_groups_clear(struct vine_manager *q)
92+
{
93+
itable_clear(q->task_group_table, (void *)vine_task_group_delete);
94+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/*
2+
Copyright (C) 2022- The University of Notre Dame
3+
This software is distributed under the GNU General Public License.
4+
See the file COPYING for details.
5+
*/
6+
7+
#include "taskvine.h"
8+
#include "vine_manager.h"
9+
#include "uuid.h"
10+
11+
12+
int vine_task_groups_assign_task(struct vine_manager *q, struct vine_task *t);
13+
14+
void vine_task_groups_clear(struct vine_manager *q);

taskvine/src/worker/vine_sandbox.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Return VINE_STATUS_PROCESSING if some are not ready.
3535
Return VINE_STATUS_FAILED if some have definitely failed.
3636
*/
3737

38-
vine_cache_status_t vine_sandbox_ensure(struct vine_process *p, struct vine_cache *cache, struct link *manager)
38+
vine_cache_status_t vine_sandbox_ensure(struct vine_process *p, struct vine_cache *cache, struct link *manager, struct itable *procs_table)
3939
{
4040
int processing = 0;
4141

@@ -52,7 +52,30 @@ vine_cache_status_t vine_sandbox_ensure(struct vine_process *p, struct vine_cach
5252
break;
5353
case VINE_CACHE_STATUS_READY:
5454
break;
55-
case VINE_CACHE_STATUS_UNKNOWN:
55+
case VINE_CACHE_STATUS_UNKNOWN: {
56+
struct vine_process *lp;
57+
uint64_t task_id;
58+
int found_file = 0;
59+
ITABLE_ITERATE(procs_table, task_id, lp)
60+
{
61+
struct vine_mount *lm;
62+
LIST_ITERATE(lp->task->output_mounts, lm)
63+
{
64+
if (strcmp(lm->file->cached_name, m->file->cached_name) == 0) {
65+
found_file = 1;
66+
break;
67+
}
68+
}
69+
if (found_file) {
70+
break;
71+
}
72+
}
73+
if (found_file) {
74+
processing++;
75+
break;
76+
}
77+
}
78+
return VINE_CACHE_STATUS_FAILED;
5679
case VINE_CACHE_STATUS_FAILED:
5780
return VINE_CACHE_STATUS_FAILED;
5881
}

0 commit comments

Comments
 (0)