#1
  1. No Profile Picture
    Registered User
    Devshed Newbie (0 - 499 posts)

    Join Date
    Dec 2012
    Posts
    1
    Rep Power
    0

    Edit a HTML file


    Hey, guys. I need a little help here. I have to edit a HTML text file in C and my first task is to put every tag and the text between them on a new line. So far I managed to do that everytime I find an ">", but I can't do the same thing for the "<" character. Here is the source:
    #include <stdio.h>
    #include <string.h>

    void strins (char *d, char *s)
    {
    char *aux=strdup(d);
    strcpy(d,s);
    strcat(d,aux);
    }
    void verif1(char *a, char b[10])
    {
    char *poz;
    poz=strchr(a,'>');
    while (poz!=NULL)
    {
    if(a[poz-a+1]!='\n' && a[poz-a+1]!='\0')
    {
    strins(a+(poz-a+1),b);
    a=(char *)realloc(a,strlen(a)*sizeof(char));
    }
    poz=strchr(poz+1,'>');
    }
    }
    void verif2(char *a, char b[10])
    {
    char *poz;
    poz=strchr(a,'<');
    while (poz!=NULL)
    {
    if((poz-a)!=0 && a[poz-a-1]!='\n')
    {
    strins(a+(poz-a-1),b);
    a=(char *)realloc(a,strlen(a)*sizeof(char));
    }
    poz=strchr(poz+1,'<');
    }
    }
    int main()
    {
    freopen("date.in","r",stdin);
    char *a, b[10];
    int i=0;
    b[i]='\n';
    b[i+1]='\0';
    a=(char *)malloc(40*sizeof(char));
    while (!feof(stdin))
    {
    a[i]=getc(stdin);
    i++;
    }
    a[i]='\0';
    verif1(a,b);
    verif2(a,b);
    printf("%s \n",a);
    return 0;
    }
    The verif1 function is for the ">" character and verif2 is for "<". Can anybody tell me what might be the problem?
  2. #2
  3. Contributed User
    Devshed Specialist (4000 - 4499 posts)

    Join Date
    Jun 2005
    Posts
    4,417
    Rep Power
    1871
    Allow me to indent your code...
    Code:
    #include <stdio.h>
    #include <string.h>
    
    void strins(char *d, char *s)
    {
      char *aux = strdup(d);
      strcpy(d, s);
      strcat(d, aux);
    }
    
    void verif1(char *a, char b[10])
    {
      char *poz;
      poz = strchr(a, '>');
      while (poz != NULL) {
        if (a[poz - a + 1] != '\n' && a[poz - a + 1] != '\0') {
          strins(a + (poz - a + 1), b);
          a = (char *) realloc(a, strlen(a) * sizeof(char));
        }
        poz = strchr(poz + 1, '>');
      }
    }
    
    void verif2(char *a, char b[10])
    {
      char *poz;
      poz = strchr(a, '<');
      while (poz != NULL) {
        if ((poz - a) != 0 && a[poz - a - 1] != '\n') {
          strins(a + (poz - a - 1), b);
          a = (char *) realloc(a, strlen(a) * sizeof(char));
        }
        poz = strchr(poz + 1, '<');
      }
    }
    
    int main()
    {
      freopen("date.in", "r", stdin);
      char *a, b[10];
      int i = 0;
      b[i] = '\n';
      b[i + 1] = '\0';
      a = (char *) malloc(40 * sizeof(char));
      while (!feof(stdin)) {
        a[i] = getc(stdin);
        i++;
      }
      a[i] = '\0';
      verif1(a, b);
      verif2(a, b);
      printf("%s \n", a);
      return 0;
    }
    Here are some problems.
    1. strins leaks the memory allocated to aux
    2. your realloc calls are shrinking memory, not growing memory. strlen() tells you the length of the string excluding the \0. You need to count the \0 AND allow for the insertion of one more character.

    General comments.
    1. Better variable names than a, b, s would go a long way to improving readability.
    2. Expressions like a[poz - a + 1] can be reduced to poz[1]
    3. Don't cast the return result of realloc/malloc. All it is doing in your case is masking the fact that you've failed to include stdlib.h.
    4. The initial allocation of 40 chars is inadequate for any meaningful HTML document.
    5. Your use of feof() is wrong - see here
    6. The b variable in main could be reduced to char *b = "\n";
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper

IMN logo majestic logo threadwatch logo seochat tools logo