"Fossies" - the Fresh Open Source Software Archive 
Member "tin-2.6.2/src/refs.c" (9 Dec 2022, 27779 Bytes) of package /linux/misc/tin-2.6.2.tar.xz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "refs.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
2.6.1_vs_2.6.2.
1 /*
2 * Project : tin - a Usenet reader
3 * Module : refs.c
4 * Author : Jason Faultless <jason@altarstone.com>
5 * Created : 1996-05-09
6 * Updated : 2022-02-19
7 * Notes : Caching of message ids / References based threading
8 * Credits : Richard Hodson <richard@macgyver.tele2.co.uk>
9 * hash_msgid, free_msgid
10 *
11 * Copyright (c) 1996-2023 Jason Faultless <jason@altarstone.com>
12 * All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright notice,
19 * this list of conditions and the following disclaimer.
20 *
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 *
25 * 3. Neither the name of the copyright holder nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42
43 #ifndef TIN_H
44 # include "tin.h"
45 #endif /* !TIN_H */
46
47 #define MAX_REFS 100 /* Limit recursion depth */
48 #define REF_SEP " \t" /* Separator chars in ref headers */
49
50 #ifdef DEBUG
51 # define DEBUG_PRINT(x) if (dbgfd != NULL) fprintf x
52 static FILE *dbgfd;
53 #endif /* DEBUG */
54
55 /*
56 * local prototypes
57 */
58 static char *_get_references(struct t_msgid *refptr, int depth);
59 static struct t_msgid *add_msgid(int key, const char *msgid, struct t_msgid *newparent);
60 static struct t_msgid *find_next(struct t_msgid *ptr);
61 static struct t_msgid *parse_references(char *r);
62 static t_bool valid_msgid(char *msgid);
63 static unsigned int hash_msgid(const char *key);
64 static void add_to_parent(struct t_msgid *ptr);
65 static void build_thread(struct t_msgid *ptr);
66 static void rearrange_siblings(void);
67 #ifdef DEBUG
68 static void dump_msgid_thread(struct t_msgid *ptr, int level);
69 static void dump_msgid_threads(void);
70 #endif /* DEBUG */
71 #if 0
72 static void dump_msgids(void);
73 static void dump_thread(FILE *fp, struct t_msgid *msgid, int level);
74 #endif /* 0 */
75
76 /*
77 * The msgids are all hashed into a big array, with overspill
78 */
79 static struct t_msgid *msgids[MSGID_HASH_SIZE] = {0};
80
81 /*
82 * This part of the code deals with the caching and retrieval
83 * of Message-ID and References headers
84 *
85 * Rationale:
86 * Even though the Message-ID is unique to an article, the References
87 * field contains msgids from elsewhere in the group. As the expiry
88 * period increases, so does the redundancy of data.
89 * At the time of writing, comp.os.ms-windows.advocacy held ~850
90 * articles. The references fields contained 192k of text, of which
91 * 169k was saved using the new caching.
92 *
93 * When threading on Refs, a much better view of the original thread
94 * can be built up using this data, and threading is much faster
95 * because all the article relationships are automatically available
96 * to us.
97 *
98 * NB: We don't cache msgids from the filter file.
99 */
100
101 /*
102 * Hash a message id. A msgid is of the form <unique@sitename>
103 * (But badly broken message id's do occur)
104 * We hash on the unique portion which should have good randomness in
105 * the lower 5 bits. Propagate the random bits up with a shift, and
106 * mix in the new bits with an exclusive or.
107 *
108 * This should generate about 5+strlen(string_start) bits of randomness.
109 * MSGID_HASH_SIZE is a prime of order 2^11
110 */
111 static unsigned int
112 hash_msgid(
113 const char *key)
114 {
115 unsigned int hash = 0;
116
117 while (*key && *key != '@') {
118 hash = (hash << 1) ^ (unsigned) *key;
119 ++key;
120 }
121
122 hash %= MSGID_HASH_SIZE;
123
124 return hash;
125 }
126
127
128 /*
129 * Thread us into our parents' list of children.
130 */
131 static void
132 add_to_parent(
133 struct t_msgid *ptr)
134 {
135 struct t_msgid *p;
136
137 if (!ptr->parent)
138 return;
139
140 /*
141 * Trivial case - if we are the first child (followup)
142 */
143 if (ptr->parent->child == NULL) {
144 ptr->parent->child = ptr;
145 return;
146 }
147
148 /*
149 * Add this followup to the sibling chain of our parent.
150 * We add at the end of the chain, rearrange_siblings() will
151 * sort it later.
152 */
153 for (p = ptr->parent->child; p->sibling != NULL; p = p->sibling)
154 ;
155
156 /* ptr->sibling is already NULL */
157 p->sibling = ptr;
158 }
159
160
161 /*
162 * Checks if Message-ID has valid format
163 * Returns TRUE if it does, FALSE if it does not
164 *
165 * TODO: combine with post.c:damaged_id()
166 */
167 static t_bool
168 valid_msgid(
169 char *msgid)
170 {
171 size_t mlen = 0;
172 t_bool at_present = FALSE;
173
174 str_trim(msgid);
175 if (!msgid || *msgid != '<')
176 return FALSE;
177
178 while (isascii((unsigned char) *msgid) && isgraph((unsigned char) *msgid) && !iscntrl((unsigned char) *msgid) && *msgid != '>') {
179 if (*msgid == '@')
180 at_present = TRUE;
181 mlen++;
182 msgid++;
183 }
184
185 if (!at_present || (*msgid != '>') || mlen <= 2 /* || mlen > 250 */|| *(msgid + 1))
186 return FALSE;
187
188 return TRUE;
189 }
190
191
192 /*
193 * Adds or updates a message id in the cache.
194 * We return a ptr to the msgid, whether located or newly created.
195 *
196 * . If the message id is new, add it to the cache, creating parent, child
197 * & sibling ptrs if a parent is given.
198 *
199 * . If the message id is a duplicate, then:
200 * a) If no parent or the same parent, is given, no action is needed.
201 *
202 * b) If a parent is specified and the current parent is NULL, then
203 * add in the new parent and create child/sibling ptrs.
204 * Because we add Message-ID headers first, we don't have to worry
205 * about bogus Reference headers messing things up.
206 *
207 * c) If a conflicting parent is given:
208 *
209 * If (key == REF_REF) ignore the error - probably the refs
210 * headers are broken or have been truncated.
211 *
212 * Otherwise we have a genuine problem, two articles in one group
213 * with identical Message-IDs. This is indicative of a broken
214 * overview database.
215 */
216 static struct t_msgid *
217 add_msgid(
218 int key,
219 const char *msgid,
220 struct t_msgid *newparent)
221 {
222 struct t_msgid *ptr;
223 struct t_msgid *i;
224 unsigned int h;
225
226 if (!msgid) {
227 error_message(2, "add_msgid: NULL msgid\n");
228 free_all_arrays();
229 giveup();
230 }
231
232 h = hash_msgid(msgid + 1); /* Don't hash the initial '<' */
233
234 #ifdef DEBUG
235 if (debug & DEBUG_REFS)
236 DEBUG_PRINT((dbgfd, "---------------- Add %s %s with parent %s\n", (key == MSGID_REF) ? "MSG" : "REF", msgid, (newparent == NULL) ? _("unchanged") : newparent->txt));
237 #endif /* DEBUG */
238
239 /*
240 * Look for this message id in the cache.
241 * Broken software will sometimes damage the case of a message-id.
242 */
243 for (i = msgids[h]; i != NULL; i = i->next) {
244 if (strcasecmp(i->txt, msgid) != 0) /* No match yet */
245 continue;
246
247 /*
248 * CASE 1a - No parent specified, do nothing
249 */
250 if (newparent == NULL) {
251 #ifdef DEBUG
252 if (debug & DEBUG_REFS)
253 DEBUG_PRINT((dbgfd, "nop: %s No parent specified\n", i->txt));
254 #endif /* DEBUG */
255 return i;
256 }
257
258 /*
259 * CASE 1b - Parent not changed, do nothing
260 */
261 if (newparent == i->parent) {
262 #ifdef DEBUG
263 if (debug & DEBUG_REFS)
264 DEBUG_PRINT((dbgfd, "dup: %s -> %s (no change)\n", i->txt, i->parent ? i->parent->txt : "NULL"));
265 #endif /* DEBUG */
266 return i;
267 }
268
269 /*
270 * CASE2 - A parent has been given where there was none before.
271 * Change parent from null -> not-null & update ptrs
272 */
273 if (i->parent == NULL) {
274 /*
275 * Detect & ignore circular reference paths by looking for the
276 * new parent in this thread
277 */
278 for (ptr = newparent; ptr != NULL; ptr = ptr->parent) {
279 if (ptr == i) {
280 #ifdef DEBUG
281 if (debug & DEBUG_REFS)
282 DEBUG_PRINT((dbgfd, "Avoiding circular reference! (%s)\n", (key == MSGID_REF) ? "MSG" : "REF"));
283 #endif /* DEBUG */
284 return i;
285 }
286 }
287
288 i->parent = newparent;
289 add_to_parent(i);
290 #ifdef DEBUG
291 if (debug & DEBUG_REFS)
292 DEBUG_PRINT((dbgfd, "set: %s -> %s\n", i->txt, newparent ? newparent->txt : _("None")));
293 #endif /* DEBUG */
294 return i;
295 }
296
297 /*
298 * CASE 3 - A new parent has been given that conflicts with the
299 * current one. This is caused by
300 * 1) A duplicate Message-ID in the spool (very bad !)
301 * 2) corrupt References header
302 * All we can do is ignore the error
303 */
304 if (i->parent != newparent) {
305 #ifdef DEBUG
306 if (debug & DEBUG_REFS)
307 DEBUG_PRINT((dbgfd, "Warning: (%s) Ignoring %s -> %s (already %s)\n",
308 (key == MSGID_REF) ? "MSG" : "REF", i->txt,
309 newparent ? newparent->txt : "None", i->parent->txt));
310 #endif /* DEBUG */
311
312 return i;
313 }
314
315 error_message(2, "Error: Impossible combination of conditions!\n");
316 return i;
317 }
318
319 #ifdef DEBUG
320 if (debug & DEBUG_REFS)
321 DEBUG_PRINT((dbgfd, "new: %s -> %s\n", msgid, (newparent) ? newparent->txt : "None"));
322 #endif /* DEBUG */
323
324 /*
325 * This is a new node, so build a structure for it
326 */
327 ptr = my_malloc(sizeof(struct t_msgid) + strlen(msgid));
328
329 strcpy(ptr->txt, msgid);
330 ptr->parent = newparent;
331 ptr->child = ptr->sibling = NULL;
332 ptr->article = (key == MSGID_REF ? top_art : ART_UNAVAILABLE);
333
334 add_to_parent(ptr);
335
336 /*
337 * Insert at head of list for speed.
338 */
339 ptr->next = msgids[h];
340 msgids[h] = ptr;
341
342 return ptr;
343 }
344
345
346 /*
347 * Find a Message-ID in the cache. Return ptr to this node, or NULL if
348 * not found.
349 */
350 struct t_msgid *
351 find_msgid(
352 const char *msgid)
353 {
354 unsigned int h;
355 struct t_msgid *i;
356
357 h = hash_msgid(msgid + 1); /* Don't hash the initial '<' */
358
359 /*
360 * Look for this message id in the cache.
361 * Broken software will sometimes damage the case of a message-id.
362 */
363 for (i = msgids[h]; i != NULL; i = i->next) {
364 if (strcasecmp(i->txt, msgid) == 0) /* Found it */
365 return i;
366 }
367
368 return NULL;
369 }
370
371
372 /*
373 * Take a raw line of references data and return a ptr to a linked list of
374 * msgids, starting with the most recent entry. (Thus the list is reversed)
375 * Following the parent ptrs leads us back to the start of the thread.
376 *
377 * We iterate through the refs, adding each to the msgid cache, with
378 * the previous ref as the parent.
379 * The space saving vs. storing the refs as a single string is significant.
380 */
381 static struct t_msgid *
382 parse_references(
383 char *r)
384 {
385 char *ptr;
386 struct t_msgid *parent, *current;
387
388 if (!r)
389 return NULL;
390
391 #ifdef DEBUG
392 if (debug & DEBUG_REFS)
393 DEBUG_PRINT((dbgfd, "parse_references: %s\n", r));
394 #endif /* DEBUG */
395
396 /*
397 * Break the refs down, using REF_SEP as delimiters
398 */
399 if ((ptr = strtok(r, REF_SEP)) == NULL)
400 return NULL;
401
402 /*
403 * As per RFC 5536, a leading comment is allowed -> skip unknown
404 * token until we find a valid msgid
405 *
406 * TODO: parse these tokens to be sure it is a comment and not
407 * a damaged header
408 */
409 if (!valid_msgid(ptr)) {
410 while ((ptr = strtok(NULL, REF_SEP)) != NULL && !valid_msgid(ptr))
411 ;
412 }
413
414 if (ptr == NULL)
415 return NULL;
416
417 /*
418 * By definition, the head of the thread has no parent
419 */
420 parent = NULL;
421
422 current = add_msgid(REF_REF, ptr, parent);
423
424 while ((ptr = strtok(NULL, REF_SEP)) != NULL) {
425 if (valid_msgid(ptr)) {
426 parent = current;
427 current = add_msgid(REF_REF, ptr, parent);
428 }
429 }
430
431 return current;
432 }
433
434
435 static void
436 rearrange_siblings(
437 void)
438 {
439 int i;
440 struct t_msgid *current, *p1, *p2;
441
442 for_each_art(i) {
443 current = arts[i].refptr;
444
445 if (!current)
446 continue;
447
448 for (; current->sibling == NULL && current->parent != NULL; current = current->parent)
449 ;
450
451 if (current->sibling != NULL) {
452 for (p1 = current; p1->article == ART_UNAVAILABLE && p1->child != NULL; p1 = p1->child)
453 ;
454
455 for (p2 = current->sibling; p2->article == ART_UNAVAILABLE && p2->child != NULL; p2 = p2->child)
456 ;
457
458 if (p1->article != ART_UNAVAILABLE && p2->article != ART_UNAVAILABLE && p1->article > p2->article) {
459 if (current->parent->child == current) {
460 /*
461 * current is the first followup
462 * adjust parent->child pointer
463 */
464 current->parent->child = current->sibling;
465 p1 = current->parent->child;
466 } else {
467 /*
468 * current is not the first followup
469 * find the sibling above current
470 * adjust the sibling pointer there
471 */
472 for (p1 = current->parent->child; p1->sibling != current; p1 = p1->sibling)
473 ;
474
475 p1->sibling = current->sibling;
476 p1 = p1->sibling;
477 }
478 /*
479 * swap current <-> sibling
480 */
481 current->sibling = p1->sibling;
482 p1->sibling = current;
483 }
484 }
485 }
486 }
487
488
489 /*
490 * Reconstruct the References: field from the parent pointers
491 * NB: In deep threads this can lead to a very long line. If you want to use
492 * this function to build a Reference: line for posting be aware that the
493 * server might refuse long lines -- short it accordingly!
494 */
495 static char *
496 _get_references(
497 struct t_msgid *refptr,
498 int depth)
499 {
500 char *refs;
501 static size_t len; /* Accumulated size */
502 static size_t pos; /* current insertion position */
503
504 if (depth == 1)
505 len = 0;
506
507 len += strlen(refptr->txt) + 1; /* msgid + space */
508 if (refptr->parent == NULL || depth > MAX_REFS) {
509
510 #ifdef DEBUG
511 if (debug & DEBUG_REFS) {
512 if (depth > MAX_REFS)
513 error_message(2, "Warning: Too many refs near to %s. Truncated\n", refptr->txt);
514 }
515 #endif /* DEBUG */
516 refs = my_malloc(len + 1); /* total length + nullbyte */
517 pos = 0;
518 } else
519 refs = _get_references(refptr->parent, depth + 1);
520
521 sprintf(refs + pos, "%s ", refptr->txt);
522 pos = strlen(refs);
523
524 return refs;
525 }
526
527
528 /*
529 * A wrapper to the above, null terminate the string
530 */
531 char *
532 get_references(
533 struct t_msgid *refptr)
534 {
535 char *refs;
536 size_t len;
537
538 if (refptr == NULL)
539 return NULL;
540
541 refs = _get_references(refptr, 1);
542 len = strlen(refs);
543 refs[len - 1] = '\0';
544
545 return refs;
546 }
547
548
549 /*
550 * Clear the entire msgid cache, freeing up all chains. This is
551 * normally only needed when entering a new group
552 */
553 void
554 free_msgids(
555 void)
556 {
557 int i;
558 struct t_msgid *ptr, *next, **msgptr;
559
560 msgptr = msgids; /* first list */
561
562 for (i = MSGID_HASH_SIZE - 1; i >= 0; i--) { /* count down is faster */
563 ptr = *msgptr;
564 *msgptr++ = NULL; /* declare list empty */
565
566 while (ptr != NULL) { /* for each node in the list */
567
568 next = ptr->next; /* grab ptr before we free node */
569 free(ptr); /* release node */
570
571 ptr = next; /* hop down the chain */
572 }
573 }
574 }
575
576
577 #if 0 /* But please don't remove it */
578 /*
579 * Function to dump an ASCII tree map of a thread rooted at msgid.
580 * Output goes to fp, level is the current depth of the tree.
581 */
582 static void
583 dump_thread(
584 FILE *fp,
585 struct t_msgid *msgid,
586 int level)
587 {
588 char buff[120]; /* This is _probably_ enough */
589 char *ptr = buff;
590 int i, len;
591
592 /*
593 * Dump the current article
594 */
595 sprintf(ptr, "%3d %*s", msgid->article, 2 * level, " ");
596
597 len = strlen(ptr);
598 i = cCOLS - len - 20;
599
600 if (msgid->article >= 0)
601 sprintf(ptr + len, "%-*.*s %-17.17s", i, i, arts[msgid->article].subject, (arts[msgid->article].name) ? arts[msgid->article].name : arts[msgid->article].from);
602 else
603 sprintf(ptr + len, "%-*.*s", i, i, _("[- Unavailable -]"));
604
605 fprintf(fp, "%s\n", ptr);
606
607 if (msgid->child != NULL)
608 dump_thread(fp, msgid->child, level + 1);
609
610 if (msgid->sibling != NULL)
611 dump_thread(fp, msgid->sibling, level);
612
613 return;
614 }
615
616
617 static void
618 dump_msgids(
619 void)
620 {
621 int i;
622 struct t_msgid *ptr;
623
624 my_fprintf(stderr, "Dumping...\n");
625
626 for (i = 0; i < MSGID_HASH_SIZE; i++) {
627 if (msgids[i] != NULL) {
628 my_fprintf(stderr, "node %d", i);
629 for (ptr = msgids[i]; ptr != NULL; ptr = ptr->next)
630 my_fprintf(stderr, " -> %s", ptr->txt);
631
632 my_fprintf(stderr, "\n");
633
634 }
635 }
636 }
637 #endif /* 0 */
638
639
640 /*
641 * The rest of this code deals with reference threading
642 *
643 * Legend:
644 *
645 * . When a new thread is started, the root message will have no
646 * References: field
647 *
648 * . When a followup is posted, the message-id that was referred to
649 * will be appended to the References: field. If no References:
650 * field exists, a new one will be created, containing the single
651 * message-id
652 *
653 * . The References: field should not be truncated, though in practice
654 * this will happen, often in badly broken ways.
655 *
656 * This is simplistic, so check out RFC 1036 & son of RFC 1036 for full
657 * details from the posting point of view.
658 *
659 * We attempt to maintain 3 pointers in each message-id to handle threading
660 * on References:
661 *
662 * 1) parent - the article that the current one was in reply to
663 * An article with no References: has no parent, therefore
664 * it is the root of a thread.
665 *
666 * 2) sibling - the next reply in sequence to parent.
667 *
668 * 3) child - the first reply to the current article.
669 *
670 * These pointers are automatically set up when we read in the
671 * headers for a group.
672 *
673 * It remains for us to fill in the .thread and .prev ptrs in
674 * each article that exists in the spool, using the intelligence of
675 * the reference tree to locate the 'next' article in the thread.
676 *
677 */
678 /*
679 * Clear out all the article fields from the msgid hash prior to a
680 * rethread.
681 */
682 void
683 clear_art_ptrs(
684 void)
685 {
686 int i;
687 struct t_msgid *ptr;
688
689 for (i = MSGID_HASH_SIZE - 1; i >= 0; i--) {
690 for (ptr = msgids[i]; ptr != NULL; ptr = ptr->next)
691 ptr->article = ART_UNAVAILABLE;
692 }
693 }
694
695
696 #ifdef DEBUG
697 /*
698 * Dump out all the threads from the msgid point of view, show the
699 * related article index in arts[] where possible
700 * A thread is defined as a starting article with no parent
701 */
702 static void
703 dump_msgid_thread(
704 struct t_msgid *ptr,
705 int level)
706 {
707 fprintf(dbgfd, "%*s %s (%d)\n", level * 3, " ", ptr->txt, ptr->article);
708
709 if (ptr->child != NULL)
710 dump_msgid_thread(ptr->child, level + 1);
711
712 if (ptr->sibling != NULL)
713 dump_msgid_thread(ptr->sibling, level);
714
715 return;
716 }
717
718
719 static void
720 dump_msgid_threads(
721 void)
722 {
723 int i;
724 struct t_msgid *ptr;
725
726 fprintf(dbgfd, "Dump started.\n\n");
727
728 for (i = 0; i < MSGID_HASH_SIZE; i++) {
729 if (msgids[i] != NULL) {
730 for (ptr = msgids[i]; ptr != NULL; ptr = ptr->next) {
731 if (ptr->parent == NULL) {
732 dump_msgid_thread(ptr, 1);
733 fprintf(dbgfd, "\n");
734 }
735 }
736 }
737 }
738 fprintf(dbgfd, "Dump complete.\n\n");
739 }
740 #endif /* DEBUG */
741
742
743 /*
744 * Find the next message in the thread.
745 * We descend children before siblings, and only return articles that
746 * exist in arts[] or NULL if we are truly at the end of a thread.
747 * If there are no more down pointers, backtrack to find a sibling
748 * to continue the thread, we note this with the 'bottom' flag.
749 *
750 * A Message-ID will not be included in a thread if
751 * It doesn't point to an article OR
752 * (it's already threaded/expired OR it has been autokilled)
753 */
754 #define SKIP_ART(ptr) \
755 (ptr && (ptr->article == ART_UNAVAILABLE || \
756 (arts[ptr->article].thread != ART_UNTHREADED || \
757 (tinrc.kill_level == KILL_NOTHREAD && arts[ptr->article].killed))))
758
759 static struct t_msgid *
760 find_next(
761 struct t_msgid *ptr)
762 {
763 static t_bool bottom = FALSE;
764
765 /*
766 * Keep going while we haven't bottomed out and we haven't
767 * got something in arts[]
768 */
769 while (ptr != NULL) {
770
771 /*
772 * Children first, unless bottom is set
773 */
774 if (!bottom && ptr->child != NULL) {
775 ptr = ptr->child;
776
777 /*
778 * If article not present, keep going
779 */
780 if (SKIP_ART(ptr))
781 continue;
782 else
783 break;
784 }
785
786 if (ptr->sibling != NULL) {
787 bottom = FALSE;
788
789 ptr = ptr->sibling;
790
791 /*
792 * If article not present, keep going
793 */
794 if (SKIP_ART(ptr))
795 continue;
796 else
797 break;
798 }
799
800 /*
801 * No more child or sibling to follow, backtrack up to
802 * a sibling if we can find one
803 */
804 if (ptr->child == NULL && ptr->sibling == NULL) {
805 while (ptr != NULL && ptr->sibling == NULL)
806 ptr = ptr->parent;
807
808 /*
809 * We've backtracked up to the parent with a suitable sibling
810 * go round once again to move to this sibling
811 */
812 if (ptr)
813 bottom = TRUE;
814 else
815 break; /* Nothing found, exit with NULL */
816 }
817 }
818
819 return ptr;
820 }
821
822
823 /*
824 * Run the .thread and .prev pointers through the members of this
825 * thread.
826 */
827 static void
828 build_thread(
829 struct t_msgid *ptr)
830 {
831 struct t_msgid *newptr;
832
833 /*
834 * If the root article is gone/expired/killed, find the first valid one
835 */
836 if (SKIP_ART(ptr))
837 ptr = find_next(ptr);
838
839 /*
840 * Keep working through the thread, updating the ptrs as we go
841 */
842 while ((newptr = find_next(ptr)) != NULL) {
843 arts[newptr->article].prev = ptr->article;
844 arts[ptr->article].thread = newptr->article;
845 ptr = newptr;
846 }
847 }
848
849
850 /*
851 * Run a new set of threads through the base articles, using the
852 * parent / child / sibling / article pointers in the msgid hash.
853 */
854 void
855 thread_by_reference(
856 void)
857 {
858 int i;
859 struct t_msgid *ptr;
860
861 #ifdef DEBUG
862 if (debug & DEBUG_REFS) {
863 char file[PATH_LEN];
864
865 joinpath(file, sizeof(file), tmpdir, "REFS.info");
866 if ((dbgfd = fopen(file, "w")) != NULL)
867 dump_msgid_threads();
868 }
869 #endif /* DEBUG */
870
871 /*
872 * Build threads starting from root msgids (ie without parent)
873 */
874 for (i = 0; i < MSGID_HASH_SIZE; i++) {
875 if (msgids[i] != NULL) {
876 for (ptr = msgids[i]; ptr != NULL; ptr = ptr->next) {
877 if (ptr->parent == NULL)
878 build_thread(ptr);
879 }
880 }
881 }
882
883 #ifdef DEBUG
884 if ((debug & DEBUG_REFS) && dbgfd != NULL) {
885 DEBUG_PRINT((dbgfd, "Full dump of threading info...\n"));
886 DEBUG_PRINT((dbgfd, "%3s %3s %3s %3s : %3s %3s\n", "#", "Par", "Sib", "Chd", "In", "Thd"));
887
888 for_each_art(i) {
889 DEBUG_PRINT((dbgfd, "%3d %3d %3d %3d : %3d %3d : %.50s %s\n", i,
890 (arts[i].refptr->parent) ? arts[i].refptr->parent->article : -2,
891 (arts[i].refptr->sibling) ? arts[i].refptr->sibling->article : -2,
892 (arts[i].refptr->child) ? arts[i].refptr->child->article : -2,
893 arts[i].prev, arts[i].thread, arts[i].refptr->txt, arts[i].subject));
894 }
895
896 fclose(dbgfd);
897 }
898 #endif /* DEBUG */
899 }
900
901
902 /*
903 * Do the equivalent of subject threading, but only on the thread base
904 * messages.
905 * This should help thread together mistakenly multiply posted articles,
906 * articles which were posted to a group rather than as followups, those
907 * with missing ref headers etc.
908 * We add joined threads onto the end of the .thread chain of the previous
909 * thread. arts[] is already sorted, so the sorting of these will be
910 * correct.
911 */
912 void
913 collate_subjects(
914 void)
915 {
916 int i, j, art;
917 struct t_hashnode *h;
918
919 /*
920 * Run through the root messages of each thread. We have to traverse
921 * using arts[] and not msgids[] to preserve the sorting.
922 */
923 for_each_art(i) {
924 /*
925 * Ignore already threaded and expired arts
926 */
927 if (arts[i].prev >= 0 || IGNORE_ART(i))
928 continue;
929
930 /*
931 * Get the contents of the magic marker in the hashnode
932 */
933 h = (struct t_hashnode *) (arts[i].subject - sizeof(int) - sizeof(void *)); /* FIXME: cast increases required alignment of target type */
934 j = h->aptr;
935
936 if (j != -1 && j < i) {
937 /*
938 * Modified form of the subject threading - the only difference
939 * is that we have to add later threads onto the end of the
940 * previous thread
941 */
942 if (arts[i].subject == arts[j].subject) {
943 /* DEBUG_PRINT((dbgfd, "RES: %d is now previous, at end of %d\n", i, j)); */
944 for (art = j; arts[art].thread >= 0; art = arts[art].thread)
945 ;
946
947 arts[art].thread = i;
948 arts[i].prev = art;
949 }
950 }
951
952 /*
953 * Update the magic marker with the highest numbered mesg in
954 * arts[] that has been used in this thread so far
955 */
956 h->aptr = i;
957 }
958 }
959
960
961 /*
962 * Builds the reference tree:
963 *
964 * 1) Sort the article base. This will ensure that articles and their
965 * siblings are inserted in the correct order.
966 * 2) Add each Message-ID header and its direct reference ('reliable info')
967 * to the cache. Son of RFC 1036 mandates that if References headers must
968 * be trimmed, then at least the (1st three and) last reference should be
969 * maintained.
970 * 3) Add rest of References header to the cache. This information is less
971 * reliable than the info added in 2) and is only used to fill in any
972 * gaps in the reference tree - no information is superseded.
973 * 4) free() up the msgid and refs headers once cached
974 */
975 void
976 build_references(
977 struct t_group *group)
978 {
979 static char msg[LEN];
980 char *s;
981 int i;
982 struct t_article *art;
983 struct t_msgid *refs;
984
985 /*
986 * The articles are currently unsorted, and are as they were put by setup_hard_base()
987 */
988 if (group->attribute->sort_article_type != SORT_ARTICLES_BY_NOTHING)
989 sort_arts(group->attribute->sort_article_type);
990
991 #ifdef DEBUG
992 if (debug & DEBUG_REFS) {
993 char file[PATH_LEN];
994
995 joinpath(file, sizeof(file), tmpdir, "REFS.dump");
996 if ((dbgfd = fopen(file, "w")) != NULL) {
997 # ifdef HAVE_SETVBUF
998 SETVBUF(dbgfd, NULL, _IONBF, 0);
999 # endif /* HAVE_SETVBUF */
1000 DEBUG_PRINT((dbgfd, "MSGID phase\n"));
1001 }
1002 }
1003 #endif /* DEBUG */
1004
1005 /*
1006 * Add the Message-ID headers to the cache, using the last Reference
1007 * as the parent
1008 */
1009 snprintf(msg, sizeof(msg), _("Building References-trees (%d/%d)..."), 1, 2); /* TODO: -> lang.c */
1010 for_each_art(i) {
1011 art = &arts[i];
1012
1013 art->refptr = add_msgid(MSGID_REF, art->msgid, NULL); /* preset art->refptr */
1014 if (art->refs) {
1015 strip_line(art->refs);
1016
1017 /*
1018 * Add the last ref, and then trim it to save wasting time adding
1019 * it again later
1020 * Check for circular references to current article
1021 *
1022 * TODO: do this in a single pass
1023 */
1024 if ((s = strrchr(art->refs, '<')) != NULL) {
1025 char *ptr;
1026
1027 /*
1028 * A comment can occur after another REF_SEP, remove it
1029 *
1030 * TODO: parse it to be sure it is a comment
1031 */
1032 if ((ptr = strpbrk(s, REF_SEP)) != NULL)
1033 *ptr = '\0';
1034
1035 if (valid_msgid(s) && !strcmp(art->msgid, s)) {
1036 /*
1037 * Remove circular reference to current article
1038 */
1039 #ifdef DEBUG
1040 if (debug & DEBUG_REFS)
1041 DEBUG_PRINT((dbgfd, "removing circular reference to: %s\n", s));
1042 #endif /* DEBUG */
1043 *s = '\0';
1044 }
1045 if (valid_msgid(art->msgid) && valid_msgid(s))
1046 art->refptr = add_msgid(MSGID_REF, art->msgid, add_msgid(REF_REF, s, NULL));
1047 *s = '\0';
1048 } else
1049 FreeAndNull(art->refs);
1050 }
1051
1052 /*
1053 * set art->refptr->article - rearrange_siblings() needs this
1054 */
1055 if (art->refptr != NULL)
1056 art->refptr->article = i;
1057
1058 FreeAndNull(art->msgid); /* Now cached - discard this */
1059
1060 if (i % (MODULO_COUNT_NUM * 20) == 0)
1061 show_progress(msg, i, top_art);
1062 }
1063
1064 #ifdef DEBUG
1065 if (debug & DEBUG_REFS)
1066 DEBUG_PRINT((dbgfd, "REFS phase\n"));
1067 #endif /* DEBUG */
1068 /*
1069 * Add the References data to the cache
1070 */
1071 snprintf(msg, sizeof(msg), _("Building References-trees (%d/%d)..."), 2, 2); /* TODO: -> lang.c */
1072 for_each_art(i) {
1073 if (!arts[i].refs) /* No refs - skip */
1074 continue;
1075
1076 art = &arts[i];
1077
1078 /*
1079 * Add the remaining references as parent to the last ref we added
1080 * earlier. skip call to add_msgid when NULL pointer
1081 */
1082
1083 refs = parse_references(art->refs);
1084
1085 if (art->refptr && art->refptr->parent && valid_msgid(art->refptr->parent->txt))
1086 add_msgid(REF_REF, art->refptr->parent->txt, refs);
1087
1088 FreeAndNull(art->refs);
1089
1090 if (i % (MODULO_COUNT_NUM * 20) == 0)
1091 show_progress(msg, i, top_art);
1092 }
1093
1094 #ifdef DEBUG
1095 if ((debug & DEBUG_REFS) && dbgfd != NULL)
1096 fclose(dbgfd);
1097 #endif /* DEBUG */
1098
1099 /*
1100 * all msgids are cached now
1101 * change order of siblings if needed
1102 */
1103 rearrange_siblings();
1104 }