File: | src/usr.bin/tsort/tsort.c |
Warning: | line 729, column 31 Access to field 'from' results in a dereference of a null pointer (loaded from variable 't') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: tsort.c,v 1.37 2019/07/11 17:28:32 mestre Exp $ */ | |||
2 | /* ex:ts=8 sw=4: | |||
3 | * | |||
4 | * Copyright (c) 1999-2004 Marc Espie <espie@openbsd.org> | |||
5 | * | |||
6 | * Permission to use, copy, modify, and distribute this software for any | |||
7 | * purpose with or without fee is hereby granted, provided that the above | |||
8 | * copyright notice and this permission notice appear in all copies. | |||
9 | * | |||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
17 | */ | |||
18 | ||||
19 | #include <assert.h> | |||
20 | #include <ctype.h> | |||
21 | #include <err.h> | |||
22 | #include <limits.h> | |||
23 | #include <stddef.h> | |||
24 | #include <stdio.h> | |||
25 | #include <stdint.h> | |||
26 | #include <stdlib.h> | |||
27 | #include <string.h> | |||
28 | #include <unistd.h> | |||
29 | #include <ohash.h> | |||
30 | ||||
31 | /* The complexity of topological sorting is O(e), where e is the | |||
32 | * size of input. While reading input, vertices have to be identified, | |||
33 | * thus add the complexity of e keys retrieval among v keys using | |||
34 | * an appropriate data structure. This program uses open double hashing | |||
35 | * for that purpose. See Knuth for the expected complexity of double | |||
36 | * hashing (Brent variation should probably be used if v << e, as a user | |||
37 | * option). | |||
38 | * | |||
39 | * The algorithm used for longest cycle reporting is accurate, but somewhat | |||
40 | * expensive. It may need to build all free paths of the graph (a free | |||
41 | * path is a path that never goes twice through the same node), whose | |||
42 | * number can be as high as O(2^e). Usually, the number of free paths is | |||
43 | * much smaller though. This program's author does not believe that a | |||
44 | * significantly better worst-case complexity algorithm exists. | |||
45 | * | |||
46 | * In case of a hints file, the set of minimal nodes is maintained as a | |||
47 | * heap. The resulting complexity is O(e+v log v) for the worst case. | |||
48 | * The average should actually be near O(e). | |||
49 | * | |||
50 | * If the hints file is incomplete, there is some extra complexity incurred | |||
51 | * by make_transparent, which does propagate order values to unmarked | |||
52 | * nodes. In the worst case, make_transparent is O(e u), | |||
53 | * where u is the number of originally unmarked nodes. | |||
54 | * In practice, it is much faster. | |||
55 | * | |||
56 | * The simple topological sort algorithm detects cycles. This program | |||
57 | * goes further, breaking cycles through the use of simple heuristics. | |||
58 | * Each cycle break checks the whole set of nodes, hence if c cycles break | |||
59 | * are needed, this is an extra cost of O(c v). | |||
60 | * | |||
61 | * Possible heuristics are as follows: | |||
62 | * - break cycle at node with lowest number of predecessors (default case), | |||
63 | * - break longest cycle at node with lowest number of predecessors, | |||
64 | * - break cycle at next node from the hints file. | |||
65 | * | |||
66 | * Except for the hints file case, which sets an explicit constraint on | |||
67 | * which cycle to break, those heuristics locally result in the smallest | |||
68 | * number of broken edges. | |||
69 | * | |||
70 | * Those are admittedly greedy strategies, as is the selection of the next | |||
71 | * node from the hints file amongst equivalent candidates that is used for | |||
72 | * `stable' topological sorting. | |||
73 | */ | |||
74 | ||||
75 | #ifdef __GNUC__4 | |||
76 | #define UNUSED__attribute__((unused)) __attribute__((unused)) | |||
77 | #else | |||
78 | #define UNUSED__attribute__((unused)) | |||
79 | #endif | |||
80 | ||||
81 | struct node; | |||
82 | ||||
83 | /* The set of arcs from a given node is stored as a linked list. */ | |||
84 | struct link { | |||
85 | struct link *next; | |||
86 | struct node *node; | |||
87 | }; | |||
88 | ||||
89 | #define NO_ORDER0xffffffffU UINT_MAX0xffffffffU | |||
90 | ||||
91 | struct node { | |||
92 | unsigned int refs; /* Number of arcs left, coming into this node. | |||
93 | * Note that nodes with a null count can't | |||
94 | * be part of cycles. */ | |||
95 | unsigned int order; /* Order of nodes according to a hint file. */ | |||
96 | ||||
97 | struct link *arcs; /* List of forward arcs. */ | |||
98 | ||||
99 | /* Cycle detection algorithms build a free path of nodes. */ | |||
100 | struct node *from; /* Previous node in the current path. */ | |||
101 | struct link *traverse; /* Next link to traverse when backtracking. */ | |||
102 | unsigned int mark; /* Mark processed nodes in cycle discovery. */ | |||
103 | ||||
104 | char k[1]; /* Name of this node. */ | |||
105 | }; | |||
106 | ||||
107 | #define HASH_START9 9 | |||
108 | ||||
109 | struct array { | |||
110 | unsigned int entries; | |||
111 | struct node **t; | |||
112 | }; | |||
113 | ||||
114 | static void nodes_init(struct ohash *); | |||
115 | static struct node *node_lookup(struct ohash *, const char *, const char *); | |||
116 | static __dead__attribute__((__noreturn__)) void usage(void); | |||
117 | static struct node *new_node(const char *, const char *); | |||
118 | ||||
119 | static unsigned int read_pairs(FILE *, struct ohash *, int, | |||
120 | const char *, unsigned int, int); | |||
121 | static void split_nodes(struct ohash *, struct array *, struct array *); | |||
122 | static void make_transparent(struct ohash *); | |||
123 | static void insert_arc(struct node *, struct node *); | |||
124 | ||||
125 | #ifdef DEBUG | |||
126 | static void dump_node(struct node *); | |||
127 | static void dump_array(struct array *); | |||
128 | static void dump_hash(struct ohash *); | |||
129 | #endif | |||
130 | static unsigned int read_hints(FILE *, struct ohash *, int, | |||
131 | const char *, unsigned int); | |||
132 | static struct node *find_smallest_node(struct array *); | |||
133 | static struct node *find_good_cycle_break(struct array *); | |||
134 | static void print_cycle(struct array *); | |||
135 | static struct node *find_cycle_from(struct node *, struct array *); | |||
136 | static struct node *find_predecessor(struct array *, struct node *); | |||
137 | static unsigned int traverse_node(struct node *, unsigned int, struct array *); | |||
138 | static struct node *find_longest_cycle(struct array *, struct array *); | |||
139 | static struct node *find_normal_cycle(struct array *, struct array *); | |||
140 | ||||
141 | static void heap_down(struct array *, unsigned int); | |||
142 | static void heapify(struct array *, int); | |||
143 | static struct node *dequeue(struct array *); | |||
144 | static void enqueue(struct array *, struct node *); | |||
145 | ||||
146 | ||||
147 | ||||
148 | static void *hash_calloc(size_t, size_t, void *); | |||
149 | static void hash_free(void *, void *); | |||
150 | static void* entry_alloc(size_t, void *); | |||
151 | static void *ereallocarray(void *, size_t, size_t); | |||
152 | static void *emem(void *); | |||
153 | #define DEBUG_TRAVERSE0 0 | |||
154 | static struct ohash_info node_info = { | |||
155 | offsetof(struct node, k)__builtin_offsetof(struct node, k), NULL((void *)0), hash_calloc, hash_free, entry_alloc }; | |||
156 | static void parse_args(int, char *[], struct ohash *); | |||
157 | static int tsort(struct ohash *); | |||
158 | ||||
159 | static int quiet_flag, long_flag, | |||
160 | warn_flag, hints_flag, verbose_flag; | |||
161 | ||||
162 | ||||
163 | int main(int, char *[]); | |||
164 | ||||
165 | /*** | |||
166 | *** Memory handling. | |||
167 | ***/ | |||
168 | ||||
169 | static void * | |||
170 | emem(void *p) | |||
171 | { | |||
172 | if (p) | |||
173 | return p; | |||
174 | else | |||
175 | errx(1, "Memory exhausted"); | |||
176 | } | |||
177 | ||||
178 | static void * | |||
179 | hash_calloc(size_t n, size_t s, void *u UNUSED__attribute__((unused))) | |||
180 | { | |||
181 | return emem(calloc(n, s)); | |||
182 | } | |||
183 | ||||
184 | static void | |||
185 | hash_free(void *p, void *u UNUSED__attribute__((unused))) | |||
186 | { | |||
187 | free(p); | |||
188 | } | |||
189 | ||||
190 | static void * | |||
191 | entry_alloc(size_t s, void *u UNUSED__attribute__((unused))) | |||
192 | { | |||
193 | return ereallocarray(NULL((void *)0), 1, s); | |||
194 | } | |||
195 | ||||
196 | static void * | |||
197 | ereallocarray(void *p, size_t n, size_t s) | |||
198 | { | |||
199 | return emem(reallocarray(p, n, s)); | |||
200 | } | |||
201 | ||||
202 | ||||
203 | /*** | |||
204 | *** Hash table. | |||
205 | ***/ | |||
206 | ||||
207 | /* Inserting and finding nodes in the hash structure. | |||
208 | * We handle interval strings for efficiency wrt fgetln. */ | |||
209 | static struct node * | |||
210 | new_node(const char *start, const char *end) | |||
211 | { | |||
212 | struct node *n; | |||
213 | ||||
214 | n = ohash_create_entry(&node_info, start, &end); | |||
215 | n->from = NULL((void *)0); | |||
216 | n->arcs = NULL((void *)0); | |||
217 | n->refs = 0; | |||
218 | n->mark = 0; | |||
219 | n->order = NO_ORDER0xffffffffU; | |||
220 | n->traverse = NULL((void *)0); | |||
221 | return n; | |||
222 | } | |||
223 | ||||
224 | ||||
225 | static void | |||
226 | nodes_init(struct ohash *h) | |||
227 | { | |||
228 | ohash_init(h, HASH_START9, &node_info); | |||
229 | } | |||
230 | ||||
231 | static struct node * | |||
232 | node_lookup(struct ohash *h, const char *start, const char *end) | |||
233 | { | |||
234 | unsigned int i; | |||
235 | struct node * n; | |||
236 | ||||
237 | i = ohash_qlookupi(h, start, &end); | |||
238 | ||||
239 | n = ohash_find(h, i); | |||
240 | if (n == NULL((void *)0)) | |||
241 | n = ohash_insert(h, i, new_node(start, end)); | |||
242 | return n; | |||
243 | } | |||
244 | ||||
245 | #ifdef DEBUG | |||
246 | static void | |||
247 | dump_node(struct node *n) | |||
248 | { | |||
249 | struct link *l; | |||
250 | ||||
251 | if (n->refs == 0) | |||
252 | return; | |||
253 | printf("%s (%u/%u): ", n->k, n->order, n->refs); | |||
254 | for (l = n->arcs; l != NULL((void *)0); l = l->next) | |||
255 | if (n->refs != 0) | |||
256 | printf("%s(%u/%u) ", l->node->k, l->node->order, l->node->refs); | |||
257 | putchar('\n')(!__isthreaded ? __sputc('\n', (&__sF[1])) : (putc)('\n', (&__sF[1]))); | |||
258 | } | |||
259 | ||||
260 | static void | |||
261 | dump_array(struct array *a) | |||
262 | { | |||
263 | unsigned int i; | |||
264 | ||||
265 | for (i = 0; i < a->entries; i++) | |||
266 | dump_node(a->t[i]); | |||
267 | } | |||
268 | ||||
269 | static void | |||
270 | dump_hash(struct ohash *h) | |||
271 | { | |||
272 | unsigned int i; | |||
273 | struct node *n; | |||
274 | ||||
275 | for (n = ohash_first(h, &i); n != NULL((void *)0); n = ohash_next(h, &i)) | |||
276 | dump_node(n); | |||
277 | } | |||
278 | #endif | |||
279 | ||||
280 | /*** | |||
281 | *** Reading data. | |||
282 | ***/ | |||
283 | ||||
284 | static void | |||
285 | insert_arc(struct node *a, struct node *b) | |||
286 | { | |||
287 | struct link *l; | |||
288 | ||||
289 | /* Check that this arc is not already present. */ | |||
290 | for (l = a->arcs; l != NULL((void *)0); l = l->next) { | |||
291 | if (l->node == b) | |||
292 | return; | |||
293 | } | |||
294 | b->refs++; | |||
295 | l = ereallocarray(NULL((void *)0), 1, sizeof(struct link)); | |||
296 | l->node = b; | |||
297 | l->next = a->arcs; | |||
298 | a->arcs = l; | |||
299 | } | |||
300 | ||||
301 | static unsigned int | |||
302 | read_pairs(FILE *f, struct ohash *h, int reverse, const char *name, | |||
303 | unsigned int order, int hint) | |||
304 | { | |||
305 | int toggle; | |||
306 | struct node *a; | |||
307 | size_t size; | |||
308 | char *str; | |||
309 | ||||
310 | toggle = 1; | |||
311 | a = NULL((void *)0); | |||
312 | ||||
313 | while ((str = fgetln(f, &size)) != NULL((void *)0)) { | |||
314 | char *sentinel; | |||
315 | ||||
316 | sentinel = str + size; | |||
317 | for (;;) { | |||
318 | char *e; | |||
319 | ||||
320 | while (str < sentinel && | |||
321 | isspace((unsigned char)*str)) | |||
322 | str++; | |||
323 | if (str == sentinel) | |||
324 | break; | |||
325 | for (e = str; | |||
326 | e < sentinel && !isspace((unsigned char)*e); e++) | |||
327 | continue; | |||
328 | if (toggle) { | |||
329 | a = node_lookup(h, str, e); | |||
330 | if (a->order == NO_ORDER0xffffffffU && hint) | |||
331 | a->order = order++; | |||
332 | } else { | |||
333 | struct node *b; | |||
334 | ||||
335 | b = node_lookup(h, str, e); | |||
336 | assert(a != NULL)((a != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.bin/tsort/tsort.c" , 336, __func__, "a != NULL")); | |||
337 | if (b != a) { | |||
338 | if (reverse) | |||
339 | insert_arc(b, a); | |||
340 | else | |||
341 | insert_arc(a, b); | |||
342 | } | |||
343 | } | |||
344 | toggle = !toggle; | |||
345 | str = e; | |||
346 | } | |||
347 | } | |||
348 | if (toggle == 0) | |||
349 | errx(1, "odd number of node names in %s", name); | |||
350 | if (!feof(f)(!__isthreaded ? (((f)->_flags & 0x0020) != 0) : (feof )(f))) | |||
351 | err(1, "error reading %s", name); | |||
352 | return order; | |||
353 | } | |||
354 | ||||
355 | static unsigned int | |||
356 | read_hints(FILE *f, struct ohash *h, int quiet, const char *name, | |||
357 | unsigned int order) | |||
358 | { | |||
359 | char *str; | |||
360 | size_t size; | |||
361 | ||||
362 | while ((str = fgetln(f, &size)) != NULL((void *)0)) { | |||
363 | char *sentinel; | |||
364 | ||||
365 | sentinel = str + size; | |||
366 | for (;;) { | |||
367 | char *e; | |||
368 | struct node *a; | |||
369 | ||||
370 | while (str < sentinel && isspace((unsigned char)*str)) | |||
371 | str++; | |||
372 | if (str == sentinel) | |||
373 | break; | |||
374 | for (e = str; | |||
375 | e < sentinel && !isspace((unsigned char)*e); e++) | |||
376 | continue; | |||
377 | a = node_lookup(h, str, e); | |||
378 | if (a->order != NO_ORDER0xffffffffU) { | |||
379 | if (!quiet) | |||
380 | warnx( | |||
381 | "duplicate node %s in hints file %s", | |||
382 | a->k, name); | |||
383 | } else | |||
384 | a->order = order++; | |||
385 | str = e; | |||
386 | } | |||
387 | } | |||
388 | if (!feof(f)(!__isthreaded ? (((f)->_flags & 0x0020) != 0) : (feof )(f))) | |||
389 | err(1, "error reading %s", name); | |||
390 | return order; | |||
391 | } | |||
392 | ||||
393 | ||||
394 | /*** | |||
395 | *** Standard heap handling routines. | |||
396 | ***/ | |||
397 | ||||
398 | static void | |||
399 | heap_down(struct array *h, unsigned int i) | |||
400 | { | |||
401 | unsigned int j; | |||
402 | struct node *swap; | |||
403 | ||||
404 | for (; (j=2*i+1) < h->entries; i = j) { | |||
405 | if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order) | |||
406 | j++; | |||
407 | if (h->t[i]->order <= h->t[j]->order) | |||
408 | break; | |||
409 | swap = h->t[i]; | |||
410 | h->t[i] = h->t[j]; | |||
411 | h->t[j] = swap; | |||
412 | } | |||
413 | } | |||
414 | ||||
415 | static void | |||
416 | heapify(struct array *h, int verbose) | |||
417 | { | |||
418 | unsigned int i; | |||
419 | ||||
420 | for (i = h->entries; i != 0;) { | |||
421 | if (h->t[--i]->order == NO_ORDER0xffffffffU && verbose) | |||
422 | warnx("node %s absent from hints file", h->t[i]->k); | |||
423 | heap_down(h, i); | |||
424 | } | |||
425 | } | |||
426 | ||||
427 | #define DEQUEUE(h)( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] ) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] ) | |||
428 | ||||
429 | static struct node * | |||
430 | dequeue(struct array *h) | |||
431 | { | |||
432 | struct node *n; | |||
433 | ||||
434 | if (h->entries == 0) | |||
435 | n = NULL((void *)0); | |||
436 | else { | |||
437 | n = h->t[0]; | |||
438 | if (--h->entries != 0) { | |||
439 | h->t[0] = h->t[h->entries]; | |||
440 | heap_down(h, 0); | |||
441 | } | |||
442 | } | |||
443 | return n; | |||
444 | } | |||
445 | ||||
446 | #define ENQUEUE(h, n)do { if (hints_flag) enqueue((h), (n)); else (h)->t[(h)-> entries++] = (n); } while(0); do { \ | |||
447 | if (hints_flag) \ | |||
448 | enqueue((h), (n)); \ | |||
449 | else \ | |||
450 | (h)->t[(h)->entries++] = (n); \ | |||
451 | } while(0); | |||
452 | ||||
453 | static void | |||
454 | enqueue(struct array *h, struct node *n) | |||
455 | { | |||
456 | unsigned int i, j; | |||
457 | struct node *swap; | |||
458 | ||||
459 | h->t[h->entries++] = n; | |||
460 | for (i = h->entries-1; i > 0; i = j) { | |||
461 | j = (i-1)/2; | |||
462 | if (h->t[j]->order < h->t[i]->order) | |||
463 | break; | |||
464 | swap = h->t[j]; | |||
465 | h->t[j] = h->t[i]; | |||
466 | h->t[i] = swap; | |||
467 | } | |||
468 | } | |||
469 | ||||
470 | /* Nodes without order should not hinder direct dependencies. | |||
471 | * Iterate until no nodes are left. | |||
472 | */ | |||
473 | static void | |||
474 | make_transparent(struct ohash *hash) | |||
475 | { | |||
476 | struct node *n; | |||
477 | unsigned int i; | |||
478 | struct link *l; | |||
479 | int adjusted; | |||
480 | int bad; | |||
481 | unsigned int min; | |||
482 | ||||
483 | /* first try to solve complete nodes */ | |||
484 | do { | |||
485 | adjusted = 0; | |||
486 | bad = 0; | |||
487 | for (n = ohash_first(hash, &i); n != NULL((void *)0); | |||
488 | n = ohash_next(hash, &i)) { | |||
489 | if (n->order == NO_ORDER0xffffffffU) { | |||
490 | min = NO_ORDER0xffffffffU; | |||
491 | ||||
492 | for (l = n->arcs; l != NULL((void *)0); l = l->next) { | |||
493 | /* unsolved node -> delay resolution */ | |||
494 | if (l->node->order == NO_ORDER0xffffffffU) { | |||
495 | bad = 1; | |||
496 | break; | |||
497 | } else if (l->node->order < min) | |||
498 | min = l->node->order; | |||
499 | } | |||
500 | if (min < NO_ORDER0xffffffffU && l == NULL((void *)0)) { | |||
501 | n->order = min; | |||
502 | adjusted = 1; | |||
503 | } | |||
504 | } | |||
505 | } | |||
506 | ||||
507 | } while (adjusted); | |||
508 | ||||
509 | /* then, if incomplete nodes are left, do them */ | |||
510 | if (bad) do { | |||
511 | adjusted = 0; | |||
512 | for (n = ohash_first(hash, &i); n != NULL((void *)0); | |||
513 | n = ohash_next(hash, &i)) | |||
514 | if (n->order == NO_ORDER0xffffffffU) | |||
515 | for (l = n->arcs; l != NULL((void *)0); l = l->next) | |||
516 | if (l->node->order < n->order) { | |||
517 | n->order = l->node->order; | |||
518 | adjusted = 1; | |||
519 | } | |||
520 | } while (adjusted); | |||
521 | } | |||
522 | ||||
523 | ||||
524 | /*** | |||
525 | *** Search through hash array for nodes. | |||
526 | ***/ | |||
527 | ||||
528 | /* Split nodes into unrefed nodes/live nodes. */ | |||
529 | static void | |||
530 | split_nodes(struct ohash *hash, struct array *heap, struct array *remaining) | |||
531 | { | |||
532 | ||||
533 | struct node *n; | |||
534 | unsigned int i; | |||
535 | ||||
536 | heap->t = ereallocarray(NULL((void *)0), ohash_entries(hash), | |||
537 | sizeof(struct node *)); | |||
538 | remaining->t = ereallocarray(NULL((void *)0), ohash_entries(hash), | |||
539 | sizeof(struct node *)); | |||
540 | heap->entries = 0; | |||
541 | remaining->entries = 0; | |||
542 | ||||
543 | for (n = ohash_first(hash, &i); n != NULL((void *)0); n = ohash_next(hash, &i)) { | |||
544 | if (n->refs == 0) | |||
545 | heap->t[heap->entries++] = n; | |||
546 | else | |||
547 | remaining->t[remaining->entries++] = n; | |||
548 | } | |||
549 | } | |||
550 | ||||
551 | /* Good point to break a cycle: live node with as few refs as possible. */ | |||
552 | static struct node * | |||
553 | find_good_cycle_break(struct array *h) | |||
554 | { | |||
555 | unsigned int i; | |||
556 | unsigned int best; | |||
557 | struct node *u; | |||
558 | ||||
559 | best = UINT_MAX0xffffffffU; | |||
560 | u = NULL((void *)0); | |||
561 | ||||
562 | assert(h->entries != 0)((h->entries != 0) ? (void)0 : __assert2("/usr/src/usr.bin/tsort/tsort.c" , 562, __func__, "h->entries != 0")); | |||
563 | for (i = 0; i < h->entries; i++) { | |||
564 | struct node *n = h->t[i]; | |||
565 | /* No need to look further. */ | |||
566 | if (n->refs == 1) | |||
567 | return n; | |||
568 | if (n->refs != 0 && n->refs < best) { | |||
569 | best = n->refs; | |||
570 | u = n; | |||
571 | } | |||
572 | } | |||
573 | assert(u != NULL)((u != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.bin/tsort/tsort.c" , 573, __func__, "u != NULL")); | |||
574 | return u; | |||
575 | } | |||
576 | ||||
577 | /* Retrieve the node with the smallest order. */ | |||
578 | static struct node * | |||
579 | find_smallest_node(struct array *h) | |||
580 | { | |||
581 | unsigned int i; | |||
582 | unsigned int best; | |||
583 | struct node *u; | |||
584 | ||||
585 | best = UINT_MAX0xffffffffU; | |||
586 | u = NULL((void *)0); | |||
587 | ||||
588 | assert(h->entries != 0)((h->entries != 0) ? (void)0 : __assert2("/usr/src/usr.bin/tsort/tsort.c" , 588, __func__, "h->entries != 0")); | |||
589 | for (i = 0; i < h->entries; i++) { | |||
590 | struct node *n = h->t[i]; | |||
591 | if (n->refs != 0 && n->order < best) { | |||
592 | best = n->order; | |||
593 | u = n; | |||
594 | } | |||
595 | } | |||
596 | assert(u != NULL)((u != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.bin/tsort/tsort.c" , 596, __func__, "u != NULL")); | |||
597 | return u; | |||
598 | } | |||
599 | ||||
600 | ||||
601 | /*** | |||
602 | *** Graph algorithms. | |||
603 | ***/ | |||
604 | ||||
605 | /* Explore the nodes reachable from i to find a cycle, store it in c. | |||
606 | * This may fail. */ | |||
607 | static struct node * | |||
608 | find_cycle_from(struct node *i, struct array *c) | |||
609 | { | |||
610 | struct node *n; | |||
611 | ||||
612 | n = i; | |||
613 | /* XXX Previous cycle findings may have left this pointer non-null. */ | |||
614 | i->from = NULL((void *)0); | |||
615 | ||||
616 | for (;;) { | |||
617 | /* Note that all marks are reversed before this code exits. */ | |||
618 | n->mark = 1; | |||
619 | if (n->traverse) | |||
620 | n->traverse = n->traverse->next; | |||
621 | else | |||
622 | n->traverse = n->arcs; | |||
623 | /* Skip over dead nodes. */ | |||
624 | while (n->traverse && n->traverse->node->refs == 0) | |||
625 | n->traverse = n->traverse->next; | |||
626 | if (n->traverse) { | |||
627 | struct node *go = n->traverse->node; | |||
628 | ||||
629 | if (go->mark) { | |||
630 | c->entries = 0; | |||
631 | for (; n != NULL((void *)0) && n != go; n = n->from) { | |||
632 | c->t[c->entries++] = n; | |||
633 | n->mark = 0; | |||
634 | } | |||
635 | for (; n != NULL((void *)0); n = n->from) | |||
636 | n->mark = 0; | |||
637 | c->t[c->entries++] = go; | |||
638 | return go; | |||
639 | } else { | |||
640 | go->from = n; | |||
641 | n = go; | |||
642 | } | |||
643 | } else { | |||
644 | n->mark = 0; | |||
645 | n = n->from; | |||
646 | if (n == NULL((void *)0)) | |||
647 | return NULL((void *)0); | |||
648 | } | |||
649 | } | |||
650 | } | |||
651 | ||||
652 | /* Find a live predecessor of node n. This is a slow routine, as it needs | |||
653 | * to go through the whole array, but it is not needed often. | |||
654 | */ | |||
655 | static struct node * | |||
656 | find_predecessor(struct array *a, struct node *n) | |||
657 | { | |||
658 | unsigned int i; | |||
659 | ||||
660 | for (i = 0; i < a->entries; i++) { | |||
661 | struct node *m; | |||
662 | ||||
663 | m = a->t[i]; | |||
664 | if (m->refs != 0) { | |||
665 | struct link *l; | |||
666 | ||||
667 | for (l = m->arcs; l != NULL((void *)0); l = l->next) | |||
668 | if (l->node == n) | |||
669 | return m; | |||
670 | } | |||
671 | } | |||
672 | assert(1 == 0)((1 == 0) ? (void)0 : __assert2("/usr/src/usr.bin/tsort/tsort.c" , 672, __func__, "1 == 0")); | |||
673 | return NULL((void *)0); | |||
674 | } | |||
675 | ||||
676 | /* Traverse all strongly connected components reachable from node n. | |||
677 | Start numbering them at o. Return the maximum order reached. | |||
678 | Update the largest cycle found so far. | |||
679 | */ | |||
680 | static unsigned int | |||
681 | traverse_node(struct node *n, unsigned int o, struct array *c) | |||
682 | { | |||
683 | unsigned int min, max; | |||
684 | ||||
685 | n->from = NULL((void *)0); | |||
686 | min = o; | |||
687 | max = ++o; | |||
688 | ||||
689 | for (;;) { | |||
690 | n->mark = o; | |||
691 | if (DEBUG_TRAVERSE0) | |||
692 | printf("%s(%u) ", n->k, n->mark); | |||
693 | /* Find next arc to explore. */ | |||
694 | if (n->traverse) | |||
695 | n->traverse = n->traverse->next; | |||
696 | else | |||
697 | n->traverse = n->arcs; | |||
698 | /* Skip over dead nodes. */ | |||
699 | while (n->traverse && n->traverse->node->refs == 0) | |||
700 | n->traverse = n->traverse->next; | |||
701 | /* If arc left. */ | |||
702 | if (n->traverse
| |||
703 | struct node *go; | |||
704 | ||||
705 | go = n->traverse->node; | |||
706 | /* Optimisation: if go->mark < min, we already | |||
707 | * visited this strongly-connected component in | |||
708 | * a previous pass. Hence, this can yield no new | |||
709 | * cycle. */ | |||
710 | ||||
711 | /* Not part of the current path: go for it. */ | |||
712 | if (go->mark == 0 || go->mark == min) { | |||
713 | go->from = n; | |||
714 | n = go; | |||
715 | o++; | |||
716 | if (o > max) | |||
717 | max = o; | |||
718 | /* Part of the current path: check cycle length. */ | |||
719 | } else if (go->mark > min
| |||
720 | if (DEBUG_TRAVERSE0) | |||
721 | printf("%d\n", o - go->mark + 1); | |||
722 | if (o - go->mark + 1 > c->entries) { | |||
723 | struct node *t; | |||
724 | unsigned int i; | |||
725 | ||||
726 | c->entries = o - go->mark + 1; | |||
727 | i = 0; | |||
728 | c->t[i++] = go; | |||
729 | for (t = n; t
| |||
| ||||
730 | c->t[i++] = t; | |||
731 | } | |||
732 | } | |||
733 | ||||
734 | /* No arc left: backtrack. */ | |||
735 | } else { | |||
736 | n->mark = min; | |||
737 | n = n->from; | |||
738 | if (!n) | |||
739 | return max; | |||
740 | o--; | |||
741 | } | |||
742 | } | |||
743 | } | |||
744 | ||||
745 | static void | |||
746 | print_cycle(struct array *c) | |||
747 | { | |||
748 | unsigned int i; | |||
749 | ||||
750 | /* Printing in reverse order, since cycle discoveries finds reverse | |||
751 | * edges. */ | |||
752 | for (i = c->entries; i != 0;) { | |||
753 | i--; | |||
754 | warnx("%s", c->t[i]->k); | |||
755 | } | |||
756 | } | |||
757 | ||||
758 | static struct node * | |||
759 | find_longest_cycle(struct array *h, struct array *c) | |||
760 | { | |||
761 | unsigned int i; | |||
762 | unsigned int o; | |||
763 | unsigned int best; | |||
764 | struct node *n; | |||
765 | static int notfirst = 0; | |||
766 | ||||
767 | assert(h->entries != 0)((h->entries != 0) ? (void)0 : __assert2("/usr/src/usr.bin/tsort/tsort.c" , 767, __func__, "h->entries != 0")); | |||
768 | ||||
769 | /* No cycle found yet. */ | |||
770 | c->entries = 0; | |||
771 | ||||
772 | /* Reset the set of marks, except the first time around. */ | |||
773 | if (notfirst
| |||
774 | for (i = 0; i < h->entries; i++) | |||
775 | h->t[i]->mark = 0; | |||
776 | } else | |||
777 | notfirst = 1; | |||
778 | ||||
779 | o = 0; | |||
780 | ||||
781 | /* Traverse the array. Each unmarked, live node heralds a | |||
782 | * new set of strongly connected components. */ | |||
783 | for (i = 0; i < h->entries; i++) { | |||
784 | n = h->t[i]; | |||
785 | if (n->refs
| |||
786 | /* Each call to traverse_node uses a separate | |||
787 | * interval of numbers to mark nodes. */ | |||
788 | o++; | |||
789 | o = traverse_node(n, o, c); | |||
790 | } | |||
791 | } | |||
792 | ||||
793 | assert(c->entries != 0)((c->entries != 0) ? (void)0 : __assert2("/usr/src/usr.bin/tsort/tsort.c" , 793, __func__, "c->entries != 0")); | |||
794 | n = c->t[0]; | |||
795 | best = n->refs; | |||
796 | for (i = 0; i < c->entries; i++) { | |||
797 | if (c->t[i]->refs < best) { | |||
798 | n = c->t[i]; | |||
799 | best = n->refs; | |||
800 | } | |||
801 | } | |||
802 | return n; | |||
803 | } | |||
804 | ||||
805 | static struct node * | |||
806 | find_normal_cycle(struct array *h, struct array *c) | |||
807 | { | |||
808 | struct node *b, *n; | |||
809 | ||||
810 | if (hints_flag) | |||
811 | n = find_smallest_node(h); | |||
812 | else | |||
813 | n = find_good_cycle_break(h); | |||
814 | while ((b = find_cycle_from(n, c)) == NULL((void *)0)) | |||
815 | n = find_predecessor(h, n); | |||
816 | return b; | |||
817 | } | |||
818 | ||||
819 | ||||
820 | #define plural(n)((n) > 1 ? "s" : "") ((n) > 1 ? "s" : "") | |||
821 | ||||
822 | static void | |||
823 | parse_args(int argc, char *argv[], struct ohash *pairs) | |||
824 | { | |||
825 | int c; | |||
826 | unsigned int order; | |||
827 | int reverse_flag; | |||
828 | const char **files; | |||
829 | int i, j; | |||
830 | ||||
831 | i = 0; | |||
832 | ||||
833 | reverse_flag = quiet_flag = long_flag = | |||
834 | warn_flag = hints_flag = verbose_flag = 0; | |||
835 | /* argc is good enough, as we start at argv[1] */ | |||
836 | files = ereallocarray(NULL((void *)0), argc, sizeof (char *)); | |||
837 | while ((c = getopt(argc, argv, "h:flqrvw")) != -1) { | |||
838 | switch(c) { | |||
839 | case 'h': | |||
840 | files[i++] = optarg; | |||
841 | hints_flag = 1; | |||
842 | break; | |||
843 | /*FALLTHRU*/ | |||
844 | case 'f': | |||
845 | hints_flag = 2; | |||
846 | break; | |||
847 | case 'l': | |||
848 | long_flag = 1; | |||
849 | break; | |||
850 | case 'q': | |||
851 | quiet_flag = 1; | |||
852 | break; | |||
853 | case 'r': | |||
854 | reverse_flag = 1; | |||
855 | break; | |||
856 | case 'v': | |||
857 | verbose_flag = 1; | |||
858 | break; | |||
859 | case 'w': | |||
860 | warn_flag = 1; | |||
861 | break; | |||
862 | default: | |||
863 | usage(); | |||
864 | } | |||
865 | } | |||
866 | ||||
867 | argc -= optind; | |||
868 | argv += optind; | |||
869 | ||||
870 | switch(argc) { | |||
871 | case 1: | |||
872 | files[i++] = argv[0]; | |||
873 | break; | |||
874 | case 0: | |||
875 | break; | |||
876 | default: | |||
877 | usage(); | |||
878 | } | |||
879 | ||||
880 | files[i] = NULL((void *)0); | |||
881 | ||||
882 | nodes_init(pairs); | |||
883 | order = 0; | |||
884 | ||||
885 | for (j = 0; j != i-argc; j++) { | |||
886 | FILE *f; | |||
887 | ||||
888 | f = fopen(files[j], "r"); | |||
889 | if (f == NULL((void *)0)) | |||
890 | err(1, "Can't open hint file %s", files[i]); | |||
891 | order = read_hints(f, pairs, quiet_flag, files[i], order); | |||
892 | fclose(f); | |||
893 | } | |||
894 | free(files); | |||
895 | ||||
896 | if (argc == 1) { | |||
897 | FILE *f; | |||
898 | ||||
899 | f = fopen(argv[0], "r"); | |||
900 | if (f == NULL((void *)0)) | |||
901 | err(1, "Can't open file %s", argv[0]); | |||
902 | order = read_pairs(f, pairs, reverse_flag, argv[0], order, | |||
903 | hints_flag == 2); | |||
904 | fclose(f); | |||
905 | } else { | |||
906 | order = read_pairs(stdin(&__sF[0]), pairs, reverse_flag, "stdin", | |||
907 | order, hints_flag == 2); | |||
908 | } | |||
909 | } | |||
910 | ||||
911 | static int | |||
912 | tsort(struct ohash *pairs) | |||
913 | { | |||
914 | struct array aux; /* Unrefed nodes/cycle reporting. */ | |||
915 | struct array remaining; | |||
916 | unsigned int broken_arcs, broken_cycles; | |||
917 | unsigned int left; | |||
918 | ||||
919 | broken_arcs = 0; | |||
920 | broken_cycles = 0; | |||
921 | ||||
922 | if (hints_flag) | |||
923 | make_transparent(pairs); | |||
924 | split_nodes(pairs, &aux, &remaining); | |||
925 | ohash_delete(pairs); | |||
926 | ||||
927 | if (hints_flag
| |||
928 | heapify(&aux, verbose_flag); | |||
929 | ||||
930 | left = remaining.entries + aux.entries; | |||
931 | while (left != 0) { | |||
932 | ||||
933 | /* Standard topological sort. */ | |||
934 | while (aux.entries) { | |||
935 | struct link *l; | |||
936 | struct node *n; | |||
937 | ||||
938 | n = DEQUEUE(&aux)( hints_flag ? dequeue(&aux) : (&aux)->t[--(&aux )->entries] ); | |||
939 | printf("%s\n", n->k); | |||
940 | left--; | |||
941 | /* We can't free nodes, as we don't know which | |||
942 | * entry we can remove in the hash table. We | |||
943 | * rely on refs == 0 to recognize live nodes. | |||
944 | * Decrease ref count of live nodes, enter new | |||
945 | * candidates into the unrefed list. */ | |||
946 | for (l = n->arcs; l != NULL((void *)0); l = l->next) | |||
947 | if (l->node->refs != 0 && | |||
948 | --l->node->refs == 0) { | |||
949 | ENQUEUE(&aux, l->node)do { if (hints_flag) enqueue((&aux), (l->node)); else ( &aux)->t[(&aux)->entries++] = (l->node); } while (0);; | |||
950 | } | |||
951 | } | |||
952 | /* There are still cycles to break. */ | |||
953 | if (left
| |||
954 | struct node *n; | |||
955 | ||||
956 | broken_cycles++; | |||
957 | /* XXX Simple cycle detection and long cycle | |||
958 | * detection are mutually exclusive. */ | |||
959 | ||||
960 | if (long_flag) | |||
961 | n = find_longest_cycle(&remaining, &aux); | |||
962 | else | |||
963 | n = find_normal_cycle(&remaining, &aux); | |||
964 | ||||
965 | if (!quiet_flag) { | |||
966 | warnx("cycle in data"); | |||
967 | print_cycle(&aux); | |||
968 | } | |||
969 | ||||
970 | if (verbose_flag) | |||
971 | warnx("%u edge%s broken", n->refs, | |||
972 | plural(n->refs)((n->refs) > 1 ? "s" : "")); | |||
973 | broken_arcs += n->refs; | |||
974 | n->refs = 0; | |||
975 | /* Reinitialization, cycle reporting uses aux. */ | |||
976 | aux.t[0] = n; | |||
977 | aux.entries = 1; | |||
978 | } | |||
979 | } | |||
980 | if (verbose_flag && broken_cycles != 0) | |||
981 | warnx("%u cycle%s broken, for a total of %u edge%s", | |||
982 | broken_cycles, plural(broken_cycles)((broken_cycles) > 1 ? "s" : ""), | |||
983 | broken_arcs, plural(broken_arcs)((broken_arcs) > 1 ? "s" : "")); | |||
984 | if (warn_flag) | |||
985 | return (broken_cycles < 256 ? broken_cycles : 255); | |||
986 | else | |||
987 | return (0); | |||
988 | } | |||
989 | ||||
990 | int | |||
991 | main(int argc, char *argv[]) | |||
992 | { | |||
993 | struct ohash pairs; | |||
994 | ||||
995 | if (pledge("stdio rpath", NULL((void *)0)) == -1) | |||
| ||||
996 | err(1, "pledge"); | |||
997 | ||||
998 | parse_args(argc, argv, &pairs); | |||
999 | ||||
1000 | if (pledge("stdio", NULL((void *)0)) == -1) | |||
1001 | err(1, "pledge"); | |||
1002 | ||||
1003 | return tsort(&pairs); | |||
1004 | } | |||
1005 | ||||
1006 | ||||
1007 | extern char *__progname; | |||
1008 | ||||
1009 | static void | |||
1010 | usage(void) | |||
1011 | { | |||
1012 | fprintf(stderr(&__sF[2]), "Usage: %s [-flqrvw] [-h file] [file]\n", __progname); | |||
1013 | exit(1); | |||
1014 | } |