tin  2.6.1
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.6.1.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

refs.c
Go to the documentation of this file.
1/*
2 * Project : tin - a Usenet reader
3 * Module : refs.c
4 * Author : Jason Faultless <jason@altarstone.com>
5 * Created : 1996-05-09
6 * Updated : 2021-02-23
7 * Notes : Caching of message ids / References based threading
8 * Credits : Richard Hodson <richard@macgyver.tele2.co.uk>
9 * hash_msgid, free_msgid
10 *
11 * Copyright (c) 1996-2022 Jason Faultless <jason@altarstone.com>
12 * All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright notice,
19 * this list of conditions and the following disclaimer.
20 *
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 *
25 * 3. Neither the name of the copyright holder nor the names of its
26 * contributors may be used to endorse or promote products derived from
27 * this software without specific prior written permission.
28 *
29 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42
43#ifndef TIN_H
44# include "tin.h"
45#endif /* !TIN_H */
46
47#define MAX_REFS 100 /* Limit recursion depth */
48#define REF_SEP " \t" /* Separator chars in ref headers */
49
50#ifdef DEBUG
51# define DEBUG_PRINT(x) if (dbgfd != NULL) fprintf x
52static FILE *dbgfd;
53#endif /* DEBUG */
54
55/*
56 * local prototypes
57 */
58static char *_get_references(struct t_msgid *refptr, int depth);
59static struct t_msgid *add_msgid(int key, const char *msgid, struct t_msgid *newparent);
60static struct t_msgid *find_next(struct t_msgid *ptr);
61static struct t_msgid *parse_references(char *r);
62static t_bool valid_msgid(char *msgid);
63static unsigned int hash_msgid(const char *key);
64static void add_to_parent(struct t_msgid *ptr);
65static void build_thread(struct t_msgid *ptr);
66static void rearrange_siblings(void);
67#ifdef DEBUG
68 static void dump_msgid_thread(struct t_msgid *ptr, int level);
69 static void dump_msgid_threads(void);
70#endif /* DEBUG */
71#if 0
72 static void dump_msgids(void);
73 static void dump_thread(FILE *fp, struct t_msgid *msgid, int level);
74#endif /* 0 */
75
76/*
77 * The msgids are all hashed into a big array, with overspill
78 */
79static struct t_msgid *msgids[MSGID_HASH_SIZE] = {0};
80
81/*
82 * This part of the code deals with the caching and retrieval
83 * of Message-ID and References headers
84 *
85 * Rationale:
86 * Even though the Message-ID is unique to an article, the References
87 * field contains msgids from elsewhere in the group. As the expiry
88 * period increases, so does the redundancy of data.
89 * At the time of writing, comp.os.ms-windows.advocacy held ~850
90 * articles. The references fields contained 192k of text, of which
91 * 169k was saved using the new caching.
92 *
93 * When threading on Refs, a much better view of the original thread
94 * can be built up using this data, and threading is much faster
95 * because all the article relationships are automatically available
96 * to us.
97 *
98 * NB: We don't cache msgids from the filter file.
99 */
100
101/*
102 * Hash a message id. A msgid is of the form <unique@sitename>
103 * (But badly broken message id's do occur)
104 * We hash on the unique portion which should have good randomness in
105 * the lower 5 bits. Propagate the random bits up with a shift, and
106 * mix in the new bits with an exclusive or.
107 *
108 * This should generate about 5+strlen(string_start) bits of randomness.
109 * MSGID_HASH_SIZE is a prime of order 2^11
110 */
111static unsigned int
113 const char *key)
114{
115 unsigned int hash = 0;
116
117 while (*key && *key != '@') {
118 hash = (hash << 1) ^ (unsigned) *key;
119 ++key;
120 }
121
122 hash %= MSGID_HASH_SIZE;
123
124 return hash;
125}
126
127
128/*
129 * Thread us into our parents' list of children.
130 */
131static void
133 struct t_msgid *ptr)
134{
135 struct t_msgid *p;
136
137 if (!ptr->parent)
138 return;
139
140 /*
141 * Trivial case - if we are the first child (followup)
142 */
143 if (ptr->parent->child == NULL) {
144 ptr->parent->child = ptr;
145 return;
146 }
147
148 /*
149 * Add this followup to the sibling chain of our parent.
150 * We add at the end of the chain, rearrange_siblings() will
151 * sort it later.
152 */
153 for (p = ptr->parent->child; p->sibling != NULL; p = p->sibling)
154 ;
155
156/* ptr->sibling is already NULL */
157 p->sibling = ptr;
158}
159
160
161/*
162 * Checks if Message-ID has valid format
163 * Returns TRUE if it does, FALSE if it does not
164 *
165 * TODO: combine with post.c:damaged_id()
166 */
167static t_bool
169 char *msgid)
170{
171 size_t mlen = 0;
172 t_bool at_present = FALSE;
173
174 str_trim(msgid);
175 if (!msgid || *msgid != '<')
176 return FALSE;
177
178 while (isascii((unsigned char) *msgid) && isgraph((unsigned char) *msgid) && !iscntrl((unsigned char) *msgid) && *msgid != '>') {
179 if (*msgid == '@')
180 at_present = TRUE;
181 mlen++;
182 msgid++;
183 }
184
185 if (!at_present || (*msgid != '>') || mlen <= 2 /* || mlen > 250 */|| *(msgid + 1))
186 return FALSE;
187
188 return TRUE;
189}
190
191
192/*
193 * Adds or updates a message id in the cache.
194 * We return a ptr to the msgid, whether located or newly created.
195 *
196 * . If the message id is new, add it to the cache, creating parent, child
197 * & sibling ptrs if a parent is given.
198 *
199 * . If the message id is a duplicate, then:
200 * a) If no parent or the same parent, is given, no action is needed.
201 *
202 * b) If a parent is specified and the current parent is NULL, then
203 * add in the new parent and create child/sibling ptrs.
204 * Because we add Message-ID headers first, we don't have to worry
205 * about bogus Reference headers messing things up.
206 *
207 * c) If a conflicting parent is given:
208 *
209 * If (key == REF_REF) ignore the error - probably the refs
210 * headers are broken or have been truncated.
211 *
212 * Otherwise we have a genuine problem, two articles in one group
213 * with identical Message-IDs. This is indicative of a broken
214 * overview database.
215 */
216static struct t_msgid *
218 int key,
219 const char *msgid,
220 struct t_msgid *newparent)
221{
222 struct t_msgid *ptr;
223 struct t_msgid *i;
224 unsigned int h;
225
226 if (!msgid) {
227 error_message(2, "add_msgid: NULL msgid\n");
229 giveup();
230 }
231
232 h = hash_msgid(msgid + 1); /* Don't hash the initial '<' */
233
234#ifdef DEBUG
235 if (debug & DEBUG_REFS)
236 DEBUG_PRINT((dbgfd, "---------------- Add %s %s with parent %s\n", (key == MSGID_REF) ? "MSG" : "REF", msgid, (newparent == NULL) ? _("unchanged") : newparent->txt));
237#endif /* DEBUG */
238
239 /*
240 * Look for this message id in the cache.
241 * Broken software will sometimes damage the case of a message-id.
242 */
243 for (i = msgids[h]; i != NULL; i = i->next) {
244 if (strcasecmp(i->txt, msgid) != 0) /* No match yet */
245 continue;
246
247 /*
248 * CASE 1a - No parent specified, do nothing
249 */
250 if (newparent == NULL) {
251#ifdef DEBUG
252 if (debug & DEBUG_REFS)
253 DEBUG_PRINT((dbgfd, "nop: %s No parent specified\n", i->txt));
254#endif /* DEBUG */
255 return i;
256 }
257
258 /*
259 * CASE 1b - Parent not changed, do nothing
260 */
261 if (newparent == i->parent) {
262#ifdef DEBUG
263 if (debug & DEBUG_REFS)
264 DEBUG_PRINT((dbgfd, "dup: %s -> %s (no change)\n", i->txt, i->parent ? i->parent->txt : "NULL"));
265#endif /* DEBUG */
266 return i;
267 }
268
269 /*
270 * CASE2 - A parent has been given where there was none before.
271 * Change parent from null -> not-null & update ptrs
272 */
273 if (i->parent == NULL) {
274 /*
275 * Detect & ignore circular reference paths by looking for the
276 * new parent in this thread
277 */
278 for (ptr = newparent; ptr != NULL; ptr = ptr->parent) {
279 if (ptr == i) {
280#ifdef DEBUG
281 if (debug & DEBUG_REFS)
282 DEBUG_PRINT((dbgfd, "Avoiding circular reference! (%s)\n", (key == MSGID_REF) ? "MSG" : "REF"));
283#endif /* DEBUG */
284 return i;
285 }
286 }
287
288 i->parent = newparent;
289 add_to_parent(i);
290#ifdef DEBUG
291 if (debug & DEBUG_REFS)
292 DEBUG_PRINT((dbgfd, "set: %s -> %s\n", i->txt, newparent ? newparent->txt : _("None")));
293#endif /* DEBUG */
294 return i;
295 }
296
297 /*
298 * CASE 3 - A new parent has been given that conflicts with the
299 * current one. This is caused by
300 * 1) A duplicate Message-ID in the spool (very bad !)
301 * 2) corrupt References header
302 * All we can do is ignore the error
303 */
304 if (i->parent != newparent) {
305#ifdef DEBUG
306 if (debug & DEBUG_REFS)
307 DEBUG_PRINT((dbgfd, "Warning: (%s) Ignoring %s -> %s (already %s)\n",
308 (key == MSGID_REF) ? "MSG" : "REF", i->txt,
309 newparent ? newparent->txt : "None", i->parent->txt));
310#endif /* DEBUG */
311
312 return i;
313 }
314
315 error_message(2, "Error: Impossible combination of conditions!\n");
316 return i;
317 }
318
319#ifdef DEBUG
320 if (debug & DEBUG_REFS)
321 DEBUG_PRINT((dbgfd, "new: %s -> %s\n", msgid, (newparent) ? newparent->txt : "None"));
322#endif /* DEBUG */
323
324 /*
325 * This is a new node, so build a structure for it
326 */
327 ptr = my_malloc(sizeof(struct t_msgid) + strlen(msgid));
328
329 strcpy(ptr->txt, msgid);
330 ptr->parent = newparent;
331 ptr->child = ptr->sibling = NULL;
332 ptr->article = (key == MSGID_REF ? top_art : ART_UNAVAILABLE);
333
334 add_to_parent(ptr);
335
336 /*
337 * Insert at head of list for speed.
338 */
339 ptr->next = msgids[h];
340 msgids[h] = ptr;
341
342 return ptr;
343}
344
345
346/*
347 * Find a Message-ID in the cache. Return ptr to this node, or NULL if
348 * not found.
349 */
350struct t_msgid *
352 const char *msgid)
353{
354 unsigned int h;
355 struct t_msgid *i;
356
357 h = hash_msgid(msgid + 1); /* Don't hash the initial '<' */
358
359 /*
360 * Look for this message id in the cache.
361 * Broken software will sometimes damage the case of a message-id.
362 */
363 for (i = msgids[h]; i != NULL; i = i->next) {
364 if (strcasecmp(i->txt, msgid) == 0) /* Found it */
365 return i;
366 }
367
368 return NULL;
369}
370
371
372/*
373 * Take a raw line of references data and return a ptr to a linked list of
374 * msgids, starting with the most recent entry. (Thus the list is reversed)
375 * Following the parent ptrs leads us back to the start of the thread.
376 *
377 * We iterate through the refs, adding each to the msgid cache, with
378 * the previous ref as the parent.
379 * The space saving vs. storing the refs as a single string is significant.
380 */
381static struct t_msgid *
383 char *r)
384{
385 char *ptr;
386 struct t_msgid *parent, *current;
387
388 if (!r)
389 return NULL;
390
391#ifdef DEBUG
392 if (debug & DEBUG_REFS)
393 DEBUG_PRINT((dbgfd, "parse_references: %s\n", r));
394#endif /* DEBUG */
395
396 /*
397 * Break the refs down, using REF_SEP as delimiters
398 */
399 if ((ptr = strtok(r, REF_SEP)) == NULL)
400 return NULL;
401
402 /*
403 * As per RFC 5536, a leading comment is allowed -> skip unknown
404 * token until we find a valid msgid
405 *
406 * TODO: parse these tokens to be sure it is a comment and not
407 * a damaged header
408 */
409 if (!valid_msgid(ptr)) {
410 while ((ptr = strtok(NULL, REF_SEP)) != NULL && !valid_msgid(ptr))
411 ;
412 }
413
414 if (ptr == NULL)
415 return NULL;
416
417 /*
418 * By definition, the head of the thread has no parent
419 */
420 parent = NULL;
421
422 current = add_msgid(REF_REF, ptr, parent);
423
424 while ((ptr = strtok(NULL, REF_SEP)) != NULL) {
425 if (valid_msgid(ptr)) {
426 parent = current;
427 current = add_msgid(REF_REF, ptr, parent);
428 }
429 }
430
431 return current;
432}
433
434
435static void
437 void)
438{
439 int i;
440 struct t_msgid *current, *p1, *p2;
441
442 for_each_art(i) {
443 current = arts[i].refptr;
444
445 if (!current)
446 continue;
447
448 for (; current->sibling == NULL && current->parent != NULL; current = current->parent)
449 ;
450
451 if (current->sibling != NULL) {
452 for (p1 = current; p1->article == ART_UNAVAILABLE && p1->child != NULL; p1 = p1->child)
453 ;
454
455 for (p2 = current->sibling; p2->article == ART_UNAVAILABLE && p2->child != NULL; p2 = p2->child)
456 ;
457
458 if (p1->article != ART_UNAVAILABLE && p2->article != ART_UNAVAILABLE && p1->article > p2->article) {
459 if (current->parent->child == current) {
460 /*
461 * current is the first followup
462 * adjust parent->child pointer
463 */
464 current->parent->child = current->sibling;
465 p1 = current->parent->child;
466 } else {
467 /*
468 * current is not the first followup
469 * find the sibling above current
470 * adjust the sibling pointer there
471 */
472 for (p1 = current->parent->child; p1->sibling != current; p1 = p1->sibling)
473 ;
474
475 p1->sibling = current->sibling;
476 p1 = p1->sibling;
477 }
478 /*
479 * swap current <-> sibling
480 */
481 current->sibling = p1->sibling;
482 p1->sibling = current;
483 }
484 }
485 }
486}
487
488
489/*
490 * Reconstruct the References: field from the parent pointers
491 * NB: In deep threads this can lead to a very long line. If you want to use
492 * this function to build a Reference: line for posting be aware that the
493 * server might refuse long lines -- short it accordingly!
494 */
495static char *
497 struct t_msgid *refptr,
498 int depth)
499{
500 char *refs;
501 static size_t len; /* Accumulated size */
502 static size_t pos; /* current insertion position */
503
504 if (depth == 1)
505 len = 0;
506
507 len += strlen(refptr->txt) + 1; /* msgid + space */
508 if (refptr->parent == NULL || depth > MAX_REFS) {
509
510#ifdef DEBUG
511 if (debug & DEBUG_REFS) {
512 if (depth > MAX_REFS)
513 error_message(2, "Warning: Too many refs near to %s. Truncated\n", refptr->txt);
514 }
515#endif /* DEBUG */
516 refs = my_malloc(len + 1); /* total length + nullbyte */
517 pos = 0;
518 } else
519 refs = _get_references(refptr->parent, depth + 1);
520
521 sprintf(refs + pos, "%s ", refptr->txt);
522 pos = strlen(refs);
523
524 return refs;
525}
526
527
528/*
529 * A wrapper to the above, null terminate the string
530 */
531char *
533 struct t_msgid *refptr)
534{
535 char *refs;
536 size_t len;
537
538 if (refptr == NULL)
539 return NULL;
540
541 refs = _get_references(refptr, 1);
542 len = strlen(refs);
543 refs[len - 1] = '\0';
544
545 return refs;
546}
547
548
549/*
550 * Clear the entire msgid cache, freeing up all chains. This is
551 * normally only needed when entering a new group
552 */
553void
555 void)
556{
557 int i;
558 struct t_msgid *ptr, *next, **msgptr;
559
560 msgptr = msgids; /* first list */
561
562 for (i = MSGID_HASH_SIZE - 1; i >= 0; i--) { /* count down is faster */
563 ptr = *msgptr;
564 *msgptr++ = NULL; /* declare list empty */
565
566 while (ptr != NULL) { /* for each node in the list */
567
568 next = ptr->next; /* grab ptr before we free node */
569 free(ptr); /* release node */
570
571 ptr = next; /* hop down the chain */
572 }
573 }
574}
575
576
577#if 0 /* But please don't remove it */
578/*
579 * Function to dump an ASCII tree map of a thread rooted at msgid.
580 * Output goes to fp, level is the current depth of the tree.
581 */
582static void
583dump_thread(
584 FILE *fp,
585 struct t_msgid *msgid,
586 int level)
587{
588 char buff[120]; /* This is _probably_ enough */
589 char *ptr = buff;
590 int i, len;
591
592 /*
593 * Dump the current article
594 */
595 sprintf(ptr, "%3d %*s", msgid->article, 2 * level, " ");
596
597 len = strlen(ptr);
598 i = cCOLS - len - 20;
599
600 if (msgid->article >= 0)
601 sprintf(ptr + len, "%-*.*s %-17.17s", i, i, arts[msgid->article].subject, (arts[msgid->article].name) ? arts[msgid->article].name : arts[msgid->article].from);
602 else
603 sprintf(ptr + len, "%-*.*s", i, i, _("[- Unavailable -]"));
604
605 fprintf(fp, "%s\n", ptr);
606
607 if (msgid->child != NULL)
608 dump_thread(fp, msgid->child, level + 1);
609
610 if (msgid->sibling != NULL)
611 dump_thread(fp, msgid->sibling, level);
612
613 return;
614}
615
616
617static void
618dump_msgids(
619 void)
620{
621 int i;
622 struct t_msgid *ptr;
623
624 my_fprintf(stderr, "Dumping...\n");
625
626 for (i = 0; i < MSGID_HASH_SIZE; i++) {
627 if (msgids[i] != NULL) {
628 my_fprintf(stderr, "node %d", i);
629 for (ptr = msgids[i]; ptr != NULL; ptr = ptr->next)
630 my_fprintf(stderr, " -> %s", ptr->txt);
631
632 my_fprintf(stderr, "\n");
633
634 }
635 }
636}
637#endif /* 0 */
638
639
640/*
641 * The rest of this code deals with reference threading
642 *
643 * Legend:
644 *
645 * . When a new thread is started, the root message will have no
646 * References: field
647 *
648 * . When a followup is posted, the message-id that was referred to
649 * will be appended to the References: field. If no References:
650 * field exists, a new one will be created, containing the single
651 * message-id
652 *
653 * . The References: field should not be truncated, though in practice
654 * this will happen, often in badly broken ways.
655 *
656 * This is simplistic, so check out RFC 1036 & son of RFC 1036 for full
657 * details from the posting point of view.
658 *
659 * We attempt to maintain 3 pointers in each message-id to handle threading
660 * on References:
661 *
662 * 1) parent - the article that the current one was in reply to
663 * An article with no References: has no parent, therefore
664 * it is the root of a thread.
665 *
666 * 2) sibling - the next reply in sequence to parent.
667 *
668 * 3) child - the first reply to the current article.
669 *
670 * These pointers are automatically set up when we read in the
671 * headers for a group.
672 *
673 * It remains for us to fill in the .thread and .prev ptrs in
674 * each article that exists in the spool, using the intelligence of
675 * the reference tree to locate the 'next' article in the thread.
676 *
677 */
678/*
679 * Clear out all the article fields from the msgid hash prior to a
680 * rethread.
681 */
682void
684 void)
685{
686 int i;
687 struct t_msgid *ptr;
688
689 for (i = MSGID_HASH_SIZE - 1; i >= 0; i--) {
690 for (ptr = msgids[i]; ptr != NULL; ptr = ptr->next)
692 }
693}
694
695
696#ifdef DEBUG
697/*
698 * Dump out all the threads from the msgid point of view, show the
699 * related article index in arts[] where possible
700 * A thread is defined as a starting article with no parent
701 */
702static void
703dump_msgid_thread(
704 struct t_msgid *ptr,
705 int level)
706{
707 fprintf(dbgfd, "%*s %s (%d)\n", level * 3, " ", ptr->txt, ptr->article);
708
709 if (ptr->child != NULL)
710 dump_msgid_thread(ptr->child, level + 1);
711
712 if (ptr->sibling != NULL)
713 dump_msgid_thread(ptr->sibling, level);
714
715 return;
716}
717
718
719static void
720dump_msgid_threads(
721 void)
722{
723 int i;
724 struct t_msgid *ptr;
725
726 fprintf(dbgfd, "Dump started.\n\n");
727
728 for (i = 0; i < MSGID_HASH_SIZE; i++) {
729 if (msgids[i] != NULL) {
730 for (ptr = msgids[i]; ptr != NULL; ptr = ptr->next) {
731 if (ptr->parent == NULL) {
732 dump_msgid_thread(ptr, 1);
733 fprintf(dbgfd, "\n");
734 }
735 }
736 }
737 }
738 fprintf(dbgfd, "Dump complete.\n\n");
739}
740#endif /* DEBUG */
741
742
743/*
744 * Find the next message in the thread.
745 * We descend children before siblings, and only return articles that
746 * exist in arts[] or NULL if we are truly at the end of a thread.
747 * If there are no more down pointers, backtrack to find a sibling
748 * to continue the thread, we note this with the 'bottom' flag.
749 *
750 * A Message-ID will not be included in a thread if
751 * It doesn't point to an article OR
752 * (it's already threaded/expired OR it has been autokilled)
753 */
754#define SKIP_ART(ptr) \
755 (ptr && (ptr->article == ART_UNAVAILABLE || \
756 (arts[ptr->article].thread != ART_UNTHREADED || \
757 (tinrc.kill_level == KILL_NOTHREAD && arts[ptr->article].killed))))
758
759static struct t_msgid *
761 struct t_msgid *ptr)
762{
763 static t_bool bottom = FALSE;
764
765 /*
766 * Keep going while we haven't bottomed out and we haven't
767 * got something in arts[]
768 */
769 while (ptr != NULL) {
770
771 /*
772 * Children first, unless bottom is set
773 */
774 if (!bottom && ptr->child != NULL) {
775 ptr = ptr->child;
776
777 /*
778 * If article not present, keep going
779 */
780 if (SKIP_ART(ptr))
781 continue;
782 else
783 break;
784 }
785
786 if (ptr->sibling != NULL) {
787 bottom = FALSE;
788
789 ptr = ptr->sibling;
790
791 /*
792 * If article not present, keep going
793 */
794 if (SKIP_ART(ptr))
795 continue;
796 else
797 break;
798 }
799
800 /*
801 * No more child or sibling to follow, backtrack up to
802 * a sibling if we can find one
803 */
804 if (ptr->child == NULL && ptr->sibling == NULL) {
805 while (ptr != NULL && ptr->sibling == NULL)
806 ptr = ptr->parent;
807
808 /*
809 * We've backtracked up to the parent with a suitable sibling
810 * go round once again to move to this sibling
811 */
812 if (ptr)
813 bottom = TRUE;
814 else
815 break; /* Nothing found, exit with NULL */
816 }
817 }
818
819 return ptr;
820}
821
822
823/*
824 * Run the .thread and .prev pointers through the members of this
825 * thread.
826 */
827static void
829 struct t_msgid *ptr)
830{
831 struct t_msgid *newptr;
832
833 /*
834 * If the root article is gone/expired/killed, find the first valid one
835 */
836 if (SKIP_ART(ptr))
837 ptr = find_next(ptr);
838
839 /*
840 * Keep working through the thread, updating the ptrs as we go
841 */
842 while ((newptr = find_next(ptr)) != NULL) {
843 arts[newptr->article].prev = ptr->article;
844 arts[ptr->article].thread = newptr->article;
845 ptr = newptr;
846 }
847}
848
849
850/*
851 * Run a new set of threads through the base articles, using the
852 * parent / child / sibling / article pointers in the msgid hash.
853 */
854void
856 void)
857{
858 int i;
859 struct t_msgid *ptr;
860
861#ifdef DEBUG
862 if (debug & DEBUG_REFS) {
863 char file[PATH_LEN];
864
865 joinpath(file, sizeof(file), TMPDIR, "REFS.info");
866 if ((dbgfd = fopen(file, "w")) != NULL)
867 dump_msgid_threads();
868 }
869#endif /* DEBUG */
870
871 /*
872 * Build threads starting from root msgids (ie without parent)
873 */
874 for (i = 0; i < MSGID_HASH_SIZE; i++) {
875 if (msgids[i] != NULL) {
876 for (ptr = msgids[i]; ptr != NULL; ptr = ptr->next) {
877 if (ptr->parent == NULL)
878 build_thread(ptr);
879 }
880 }
881 }
882
883#ifdef DEBUG
884 if ((debug & DEBUG_REFS) && dbgfd != NULL) {
885 DEBUG_PRINT((dbgfd, "Full dump of threading info...\n"));
886 DEBUG_PRINT((dbgfd, "%3s %3s %3s %3s : %3s %3s\n", "#", "Par", "Sib", "Chd", "In", "Thd"));
887
888 for_each_art(i) {
889 DEBUG_PRINT((dbgfd, "%3d %3d %3d %3d : %3d %3d : %.50s %s\n", i,
890 (arts[i].refptr->parent) ? arts[i].refptr->parent->article : -2,
891 (arts[i].refptr->sibling) ? arts[i].refptr->sibling->article : -2,
892 (arts[i].refptr->child) ? arts[i].refptr->child->article : -2,
893 arts[i].prev, arts[i].thread, arts[i].refptr->txt, arts[i].subject));
894 }
895
896 fclose(dbgfd);
897 }
898#endif /* DEBUG */
899}
900
901
902/*
903 * Do the equivalent of subject threading, but only on the thread base
904 * messages.
905 * This should help thread together mistakenly multiply posted articles,
906 * articles which were posted to a group rather than as followups, those
907 * with missing ref headers etc.
908 * We add joined threads onto the end of the .thread chain of the previous
909 * thread. arts[] is already sorted, so the sorting of these will be
910 * correct.
911 */
912void
914 void)
915{
916 int i, j, art;
917 struct t_hashnode *h;
918
919 /*
920 * Run through the root messages of each thread. We have to traverse
921 * using arts[] and not msgids[] to preserve the sorting.
922 */
923 for_each_art(i) {
924 /*
925 * Ignore already threaded and expired arts
926 */
927 if (arts[i].prev >= 0 || IGNORE_ART(i))
928 continue;
929
930 /*
931 * Get the contents of the magic marker in the hashnode
932 */
933 h = (struct t_hashnode *) (arts[i].subject - sizeof(int) - sizeof(void *)); /* FIXME: cast increases required alignment of target type */
934 j = h->aptr;
935
936 if (j != -1 && j < i) {
937 /*
938 * Modified form of the subject threading - the only difference
939 * is that we have to add later threads onto the end of the
940 * previous thread
941 */
942 if (arts[i].subject == arts[j].subject) {
943 /* DEBUG_PRINT((dbgfd, "RES: %d is now previous, at end of %d\n", i, j)); */
944 for (art = j; arts[art].thread >= 0; art = arts[art].thread)
945 ;
946
947 arts[art].thread = i;
948 arts[i].prev = art;
949 }
950 }
951
952 /*
953 * Update the magic marker with the highest numbered mesg in
954 * arts[] that has been used in this thread so far
955 */
956 h->aptr = i;
957 }
958}
959
960
961/*
962 * Builds the reference tree:
963 *
964 * 1) Sort the article base. This will ensure that articles and their
965 * siblings are inserted in the correct order.
966 * 2) Add each Message-ID header and its direct reference ('reliable info')
967 * to the cache. Son of RFC 1036 mandates that if References headers must
968 * be trimmed, then at least the (1st three and) last reference should be
969 * maintained.
970 * 3) Add rest of References header to the cache. This information is less
971 * reliable than the info added in 2) and is only used to fill in any
972 * gaps in the reference tree - no information is superseded.
973 * 4) free() up the msgid and refs headers once cached
974 */
975void
977 struct t_group *group)
978{
979 static char msg[LEN];
980 char *s;
981 int i;
982 struct t_article *art;
983 struct t_msgid *refs;
984
985 /*
986 * The articles are currently unsorted, and are as they were put by setup_hard_base()
987 */
990
991#ifdef DEBUG
992 if (debug & DEBUG_REFS) {
993 char file[PATH_LEN];
994
995 joinpath(file, sizeof(file), TMPDIR, "REFS.dump");
996 if ((dbgfd = fopen(file, "w")) != NULL) {
997# ifdef HAVE_SETVBUF
998 SETVBUF(dbgfd, NULL, _IONBF, 0);
999# endif /* HAVE_SETVBUF */
1000 DEBUG_PRINT((dbgfd, "MSGID phase\n"));
1001 }
1002 }
1003#endif /* DEBUG */
1004
1005 /*
1006 * Add the Message-ID headers to the cache, using the last Reference
1007 * as the parent
1008 */
1009 snprintf(msg, sizeof(msg), _("Building References-trees (%d/%d)..."), 1, 2); /* TODO: -> lang.c */
1010 for_each_art(i) {
1011 art = &arts[i];
1012
1013 art->refptr = add_msgid(MSGID_REF, art->msgid, NULL); /* preset art->refptr */
1014 if (art->refs) {
1015 strip_line(art->refs);
1016
1017 /*
1018 * Add the last ref, and then trim it to save wasting time adding
1019 * it again later
1020 * Check for circular references to current article
1021 *
1022 * TODO: do this in a single pass
1023 */
1024 if ((s = strrchr(art->refs, '<')) != NULL) {
1025 char *ptr;
1026
1027 /*
1028 * A comment can occur after another REF_SEP, remove it
1029 *
1030 * TODO: parse it to be sure it is a comment
1031 */
1032 if ((ptr = strpbrk(s, REF_SEP)) != NULL)
1033 *ptr = '\0';
1034
1035 if (valid_msgid(s) && !strcmp(art->msgid, s)) {
1036 /*
1037 * Remove circular reference to current article
1038 */
1039#ifdef DEBUG
1040 if (debug & DEBUG_REFS)
1041 DEBUG_PRINT((dbgfd, "removing circular reference to: %s\n", s));
1042#endif /* DEBUG */
1043 *s = '\0';
1044 }
1045 if (valid_msgid(art->msgid) && valid_msgid(s))
1046 art->refptr = add_msgid(MSGID_REF, art->msgid, add_msgid(REF_REF, s, NULL));
1047 *s = '\0';
1048 } else
1049 FreeAndNull(art->refs);
1050 }
1051
1052 /*
1053 * set art->refptr->article - rearrange_siblings() needs this
1054 */
1055 if (art->refptr != NULL)
1056 art->refptr->article = i;
1057
1058 FreeAndNull(art->msgid); /* Now cached - discard this */
1059
1060 if (i % (MODULO_COUNT_NUM * 20) == 0)
1061 show_progress(msg, i, top_art);
1062 }
1063
1064#ifdef DEBUG
1065 if (debug & DEBUG_REFS)
1066 DEBUG_PRINT((dbgfd, "REFS phase\n"));
1067#endif /* DEBUG */
1068 /*
1069 * Add the References data to the cache
1070 */
1071 snprintf(msg, sizeof(msg), _("Building References-trees (%d/%d)..."), 2, 2); /* TODO: -> lang.c */
1072 for_each_art(i) {
1073 if (!arts[i].refs) /* No refs - skip */
1074 continue;
1075
1076 art = &arts[i];
1077
1078 /*
1079 * Add the remaining references as parent to the last ref we added
1080 * earlier. skip call to add_msgid when NULL pointer
1081 */
1082
1083 refs = parse_references(art->refs);
1084
1085 if (art->refptr && art->refptr->parent && valid_msgid(art->refptr->parent->txt))
1086 add_msgid(REF_REF, art->refptr->parent->txt, refs);
1087
1088 FreeAndNull(art->refs);
1089
1090 if (i % (MODULO_COUNT_NUM * 20) == 0)
1091 show_progress(msg, i, top_art);
1092 }
1093
1094#ifdef DEBUG
1095 if ((debug & DEBUG_REFS) && dbgfd != NULL)
1096 fclose(dbgfd);
1097#endif /* DEBUG */
1098
1099 /*
1100 * all msgids are cached now
1101 * change order of siblings if needed
1102 */
1104}
unsigned t_bool
Definition: bool.h:77
#define TRUE
Definition: bool.h:74
#define FALSE
Definition: bool.h:70
static t_openartinfo * art
Definition: cook.c:78
#define DEBUG_REFS
Definition: debug.h:51
struct t_article * arts
Definition: memory.c:69
int cCOLS
Definition: curses.c:53
int top_art
Definition: art.c:60
unsigned short debug
Definition: debug.c:51
char * strpbrk(const char *str1, const char *str2)
Definition: string.c:312
char * str_trim(char *string)
Definition: string.c:539
void error_message(unsigned int sdelay, const char *fmt,...)
Definition: screen.c:224
char * strip_line(char *line)
Definition: misc.c:3644
void free_all_arrays(void)
Definition: memory.c:237
void joinpath(char *result, size_t result_size, const char *dir, const char *file)
Definition: joinpath.c:50
int strcasecmp(const char *p, const char *q)
Definition: string.c:475
void sort_arts(unsigned sort_art_type)
_Noreturn void giveup(void)
Definition: main.c:1058
void show_progress(const char *txt, t_artnum count, t_artnum total)
Definition: screen.c:529
static void add_to_parent(struct t_msgid *ptr)
Definition: refs.c:132
static t_bool valid_msgid(char *msgid)
Definition: refs.c:168
static struct t_msgid * add_msgid(int key, const char *msgid, struct t_msgid *newparent)
Definition: refs.c:217
struct t_msgid * find_msgid(const char *msgid)
Definition: refs.c:351
static void rearrange_siblings(void)
Definition: refs.c:436
#define SKIP_ART(ptr)
Definition: refs.c:754
static unsigned int hash_msgid(const char *key)
Definition: refs.c:112
char * get_references(struct t_msgid *refptr)
Definition: refs.c:532
static struct t_msgid * find_next(struct t_msgid *ptr)
Definition: refs.c:760
static struct t_msgid * parse_references(char *r)
Definition: refs.c:382
#define REF_SEP
Definition: refs.c:48
void build_references(struct t_group *group)
Definition: refs.c:976
void collate_subjects(void)
Definition: refs.c:913
static void build_thread(struct t_msgid *ptr)
Definition: refs.c:828
#define MAX_REFS
Definition: refs.c:47
void free_msgids(void)
Definition: refs.c:554
static char * _get_references(struct t_msgid *refptr, int depth)
Definition: refs.c:496
static struct t_msgid * msgids[222199]
Definition: refs.c:79
void thread_by_reference(void)
Definition: refs.c:855
void clear_art_ptrs(void)
Definition: refs.c:683
Definition: tin.h:1533
char * subject
Definition: tin.h:1535
int prev
Definition: tin.h:1549
int thread
Definition: tin.h:1548
struct t_msgid * refptr
Definition: tin.h:1543
char * from
Definition: tin.h:1536
char * name
Definition: tin.h:1537
unsigned sort_article_type
Definition: tin.h:1694
Definition: tin.h:1816
struct t_attribute * attribute
Definition: tin.h:1834
int aptr
Definition: tin.h:1843
Definition: tin.h:1509
struct t_msgid * next
Definition: tin.h:1510
struct t_msgid * sibling
Definition: tin.h:1512
struct t_msgid * child
Definition: tin.h:1513
struct t_msgid * parent
Definition: tin.h:1511
int article
Definition: tin.h:1514
char txt[1]
Definition: tin.h:1515
#define my_fprintf
Definition: tcurses.h:176
#define LEN
Definition: tin.h:860
#define TMPDIR
Definition: tin.h:2170
#define MODULO_COUNT_NUM
Definition: tin.h:868
#define for_each_art(x)
Definition: tin.h:2260
#define SETVBUF(stream, buf, mode, size)
Definition: tin.h:2392
#define IGNORE_ART(i)
Definition: tin.h:1029
#define my_malloc(size)
Definition: tin.h:2245
#define isascii(c)
Definition: tin.h:411
#define _(Text)
Definition: tin.h:94
#define PATH_LEN
Definition: tin.h:843
#define MSGID_REF
Definition: tin.h:1481
#define REF_REF
Definition: tin.h:1480
#define snprintf
Definition: tin.h:2464
#define SORT_ARTICLES_BY_NOTHING
Definition: tin.h:1185
#define FreeAndNull(p)
Definition: tin.h:2253
#define MSGID_HASH_SIZE
Definition: tin.h:1492
#define ART_UNAVAILABLE
Definition: tin.h:1348