2

so I've been writing an mtf encoder in C and I've been running into a realloc() error regardless of what I do. I've checked to see if there was an error in my logic (and there may be) by using print statements to see if I'm overstepping the bounds of my currently malloc'd array (adding a string past my original array size) and that doesn't seem to be the issue. I've used GDB and Valgrind and GDB gives me a cryptic message while Valgrind runs into a segmentation fault. This is my first time using dynamic memory and I'm pretty confused as to what the problem is, below are my code along with the GDB error:

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int count = 0;


move_to_front(int index, char** words){
    int i;
    char *t = words[index];

    for(i = index; i>1; i--){
       words[i] = words[i-1];
    }

    words[1] = t;

}


char** reallocate_words(char** words, int* words_size_pointer){
   printf("We're entering here\n");
   printf("%d", *words_size_pointer);
   int temp = *words_size_pointer;
   char** tempor;
   temp = temp*2;
   printf("%d", temp);
   tempor = (char**) realloc(words, temp);
   int i = *words_size_pointer;
   for(i; i<temp; i++){
       tempor[i] = (char*) malloc(120);
    }
   words_size_pointer = &temp;
   return tempor;

 }

void encode_word(int* words_size_pointer, FILE *f, char* word, char** words){
    if(count == 0){
        words[1] = word;
        fputs(words[1], f);
        count++;
    }
    int i;
    for(i=0; i<=count; i++){
        if(strcmp(words[i], word) == 0){
            break;
        }
    }
    if(i>=(*words_size_pointer)){
        printf("%d\n", i);
        words = reallocate_words(words, words_size_pointer);
        words[count+1] = word;
        count++;
        fputc(count+128, f);
        fputs(words[count], f);
        move_to_front(count, words);
    }
    if(i>count){
        words[count+1] = word;
        count++;
        fputc(count+128, f);
        fputs(words[count], f);
        move_to_front(count, words);
    }
    else{
        fputc(i+128, f);
        move_to_front(i, words);
    }



}

void sep_words(char** words, char *line, int* words_size_pointer, FILE *f){
    char* x;
    int i = 0;
    x = strtok(line, " ");
    while(x != NULL){
        encode_word(words_size_pointer,f, x, words);
        x = strtok(NULL, " ");
    }   
}


void readline(FILE *f_two, FILE *f, char** words, int* words_size_pointer){
    char *line;
    size_t len = 0;
    ssize_t temp;
    int count;
    do{
    temp = getline(&line,&len,f);
    printf("%s", line);
    if(temp!= -1){
        sep_words(words, line, words_size_pointer, f_two);

    }
    }while(temp!=-1);

}


int main(int argc, char *argv[]){
    int x;
    int i;
    int j;
    x = strlen(argv[1]);
    char fi[x];
    char mtf[3] = "mtf";
    FILE *f;
    FILE *f_two;
    for(j = 0; j<(x-3); j++){
       fi[j] = argv[1][j];
    }
    strcat(fi, mtf);
    f = fopen(argv[1], "r");
    f_two = fopen(fi, "w");
    fputc(0xFA, f_two);
    fputc(0XCE, f_two);
    fputc(0XFA, f_two);
    fputc(0XDF, f_two);
    if(f == NULL){
        return 1;
    }

    char** words;
    words = (char **) malloc(20);
    for(i = 0; i<20; i++){
        words[i] = (char*) malloc(120);
    }
    int words_size = 20;
    int* words_size_pointer = &words_size;

    readline(f_two, f, words, words_size_pointer);
    return 0;
}

And as for the GDB error:

    *** Error in `/file_loc/mtfcoding2': realloc(): invalid next size: 0x0000000000603490 ***
2040 \\This is due to print statements within my function.
Program received signal SIGABRT, Aborted.
0x00007ffff7a4acc9 in __GI_raise (sig=sig@entry=6)
    at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
56  ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.

Thank you for your time! :)

  • `printf()` debugging is not foolproof. The compiler may reorder code, and output is usually buffered, so you may not be able to pinpoint the position of the error that way. Also, memory corruption is usually detected significantly after its cause. – EOF Mar 27 '15 at 23:59
  • What would you recommend I do instead? – Zack Sullivan Mar 28 '15 at 00:03
  • 1
    If on Linux, use [valgrind](http://valgrind.org/) – Basile Starynkevitch Mar 28 '15 at 00:10
  • I did, it runs into a segmentation fault when it hits the reallocate function. – Zack Sullivan Mar 28 '15 at 00:11
  • Honestly, you should start by compiling with full warnings. gcc also needs to be set to optimize, or it won't produce good warnings: `gcc -Wall -Wextra -Wpedantic -O3` should give good warnings. – EOF Mar 28 '15 at 00:11
  • `words = (char**) malloc(20);` -- probably not the issue, but did you really mean to only allocate 5 strings if you're on a machine with 32-bit pointers and 2.5 (2) strings if you're on a machine with 64-bit pointers? –  Mar 28 '15 at 00:59

3 Answers3

5

malloc and realloc require the number of bytes as argument. However you are writing code like:

char** words;
words = (char **) malloc(20);
for(i = 0; i<20; i++){
    words[i] = (char*) malloc(120);

You allocate 20 bytes but then you write 20 pointers (which probably takes 80 bytes). To fix this you need to compute how many bytes are required to store the 20 pointers. A safe way of doing this is to use malloc as recommended by SO:

words = malloc(20 * sizeof *words);

You have the same problem in your realloc call.


This line has no effect: words_size_pointer = &temp; . Perhaps you meant *words_size_pointer = temp; . Make sure you clearly understand the difference between those two lines.

NB. There may be other errors.

Community
  • 1
  • 1
M.M
  • 138,810
  • 21
  • 208
  • 365
1

Well, for starters, your move_to_front is dropping pointers. This one is a pretty bad memory leak, and given the nature of C and memory leaks, could be the cause of your segfault (for now). You should be doing this

for(i = index; i > 1; i--){
    char* tmp = words[i];
    words[i] = words[i-1];
    words[i-1] = tmp;
}

Otherwise, what you have done is overwritten the pointers from index to words[2] with the pointer at index. Also, you seem to like to start your words array at 1 instead of 0. Those off-by-one errors are gonna hurt ya too.

Also (as stated in my earlier comment), words_size_pointer = &temp; isn't quite right. Do this instead *words_size_pointer = temp;. The first way is only a local pointer re-assignment, but you want the change to be reflected in the caller's scope, so you must dereference the pointer and modify it.

Tommy
  • 580
  • 4
  • 7
  • Thank you for your answer! My original move_to_front function was written as such. But my professor posted a sample solution and I basically copied that. And I guess with me being lazy I forgot to edit the words array index from 0 to 1 (he starts everything at position 1). Unfortunately though none of these help with my reallocate(): invalid next size error :( – Zack Sullivan Mar 28 '15 at 00:14
  • 2
    you know what, it might be because of the `words_size_pointer = &temp;` Do this instead `*words_size_pointer = temp;` – Tommy Mar 28 '15 at 00:21
  • reason being, your way sets the pointer locally. You want the update to be reflected in the caller's scope so you need to deref the variable and set it that way – Tommy Mar 28 '15 at 00:22
  • @ZackSullivan Unrelated, "...he starts everything at position 1" I hope this isn't a formal course on the C language. If it is, he's not doing you any favors with that mantra. Sooner or later, you have to embrace the 0..(n-1) ideology of the language, the sooner the better. – WhozCraig Mar 28 '15 at 00:25
  • @WhozCrag No it isn't. And he has always started at 0 beforehand. The way he has his code written differs from mine and is meant to be used as a starting point for students struggling to write their own encoders from scratch. I personally always start at 0. And Tommy thanks for the update. But unfortunately that doesn't seem to work either. I'm starting to wonder whether or not to just write my encoder from scratch again. – Zack Sullivan Mar 28 '15 at 00:29
  • @ZackSullivan Another problem is the fact that you simply aren't checking your `malloc` and (even worse) `realloc` return values. You use `tempor = realloc(...);` and immediately attempt to use `tempor` when it might be `NULL`. –  Mar 28 '15 at 00:35
  • @ChronoKitsune: That wouldn't protect from memory corruption. A `NULL` dereference as a consequence of missing `realloc()` checking only leads to SIGSEGV, not SIGABRT. – EOF Mar 28 '15 at 00:37
  • 1
    @EOF it leads to undefined behaviour – M.M Mar 28 '15 at 01:21
  • @EOF You're correct of course. However, it's always a good idea to check for allocation failures, so I assumed it was worth mentioning, even if it wasn't the immediate source of this particular problem. –  Mar 28 '15 at 02:02
  • @MattMcNabb has a really good point too. You should get in the habit of writing memory allocations as `* var_name = (*)malloc(sizeof() * num_elements)` to avoid those subtle mistakes...even if you are allocating `chars` for a `char*`. Some people bitch about it, but then you end up forgetting to do that when it matters – Tommy Mar 28 '15 at 02:58
0

It seems to be caused by your call to getline in your readline function.

char *line;
size_t len = 0;
...
temp = getline(&line,&len,f);

getline requires line to be NULL (in which case the value of len is ignored) or line must be a pointer returned by malloc, calloc, or realloc. If line is not NULL, and len isn't large enough, line is resized by calling realloc. This is the crucial point: line points to some random address that wasn't returned by malloc, so getline attempts to use realloc to increase the buffer size because 0 bytes is just too small.

You also have a buffer overflow here:

char mtf[3] = "mtf";
...
for(j = 0; j<(x-3); j++){
   fi[j] = argv[1][j];
}
strcat(fi, mtf);

Because you made mtf only 3 bytes in size, it may or may not be followed immediately by a null terminator. When strcat is called, if mtf isn't followed immediately by a 0 byte in memory, you end up with something like sample.mtf\x1b\x01X as the output filename, assuming you don't write too far beyond the end of the fi array to crash the program with a SIGSEGV (segfault). Any of the following will correct it:

char mtf[4] = "mtf";
//OR
char mtf[] = "mtf";
//OR
const char *mtf = "mtf";

However, because fi is only made up of x number of bytes, you'll end up writing to fi[x] with your null terminator that strcat adds. This is a problem because char fi[x]; means you only have array indices 0 to x - 1 available. Fix this part by using x = strlen(argv[1]) + 1.