FAQ Bogofilter

Unsure · réellement

    bogofilter -s < spam.mbox
    bogofilter -n < ham.mbox
    bogominitrain.pl -fnv ~/.bogofilter ham.mbox spam.mbox '-o 0.9,0.3'
    randomtrain -s spam.mbox -n ham.mbox
    #! /bin/sh
    #  class3 -- classe un message en mauvais, bon ou incertain
    cat >msg.$$
    bogofilter $* <msg.$$
    res=$?
    if [ $res = 0 ]; then
        cat msg.$$ >>corpus.bad
    elif [ $res = 1 ]; then
        cat msg.$$ >>corpus.good
    elif [ $res = 2 ]; then
        cat msg.$$ >>corpus.unsure
    fi
    rm msg.$$
    #! /bin/sh
    # classify -- Place tous les messages dans un fichier mbox à l'aide de class3
    src=$1;
    shift
    formail -s class3 $* <$src
    classify spam.mbox [bogofilter options]
    bogofilter -s < corpus.good
    rm -f corpus.*
    classify ham.mbox [bogofilter options]
    bogofilter -n < corpus.bad
    rm -f corpus.*
    bogofilter -M -s -I ~/mail/Spam
    bogofilter -M -n -I ~/mail/NonSpam
    bogofilter -s -B ~/Maildir/.Spam
    bogofilter -n -B ~/Maildir/.NonSpam
    bogofilter -M -Ns -I ~/mail/Missed_Spam
    bogofilter -M -Sn -I ~/mail/False_Spam
    bogofilter -s -B ~/Maildir/.Missed_Spam
    bogofilter -n -B ~/Maildir/.False_Spam
    bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
    mv wordlist.db wordlist.db.prv
    mv wordlist.db.new wordlist.db
    mailtool copy /full/path/to/mail.mbox '#driver.unix//full/path/to/mbox'
    for MSG in /full/path/to/maildir/* ; do 
        formail -I Status: < "$MSG" >> /full/path/to/mbox
    done
  X-Bogosity: Ham, tests=bogofilter, spamicity=0.500000
    X-Bogosity: Ham, tests=bogofilter, spamicity=0.500000
      int  cnt    prob   spamicity  histogram
     0.00   29  0.000209  0.000052  #############################
     0.10    2  0.179065  0.003425  ##
     0.20    2  0.276880  0.008870  ##
     0.30   18  0.363295  0.069245  ##################
     0.40    0  0.000000  0.069245
     0.50    0  0.000000  0.069245
     0.60   37  0.667823  0.257307  #####################################
     0.70    5  0.767436  0.278892  #####
     0.80   13  0.836789  0.334980  #############
     0.90   32  0.984903  0.499835  ################################
  X-Bogosity: Ham, tests=bogofilter, spamicity=0.500000
                        n    pgood     pbad      fw     U
  "which"              10  0.208333  0.000000  0.000041 +
  "own"                 7  0.145833  0.000000  0.000059 +
  "having"              6  0.125000  0.000000  0.000069 +
  ...
  "unsubscribe.asp"     2  0.000000  0.095238  0.999708 +
  "million"             4  0.000000  0.190476  0.999854 +
  "copy"                5  0.000000  0.238095  0.999883 +
  N_P_Q_S_s_x_md      138  0.00e+00  0.00e+00  5.00e-01
                           1.00e-03  4.15e-01  0.100
    #### CUTOFF Values
    #
    #    both ham_cutoff and spam_cutoff are allowed.
    #    setting ham_cutoff to a non-zero value will
    #    enable tri-state results (Spam/Ham/Unsure).
    #
    #ham_cutoff  = 0.45
    #spam_cutoff = 0.99
    #
    #    for two-state classification:
    #
    ## ham_cutoff = 0.00
    ## spam_cutoff= 0.99
    ## spamicity_tags = Yes, No, Unsure
    if header contains "X-Bogosity: Spam", put in Spam folder
    if header contains "X-Bogosity: Unsure", put in Unsure folder
    #### SPAM_SUBJECT_TAG
    #
    #	tag added to "Subject: " line for identifying spam or unsure
    #    default is to add nothing.
    #
    ##spam_subject_tag=***SPAM***
    ##unsure_subject_tag=???UNSURE???
    if subject contains "***SPAM***", put in Spam folder
    if subject contains "???UNSURE???", put in Unsure folder
    BOGOFILTER     = "/usr/bin/bogofilter"
    BOGOFILTER_DIR = "training"
    SPAMASSASSIN  = "/usr/bin/spamassassin"

    :0 HBc
    * ? $SPAMASSASSIN -e
    #spam yields non-zero
    #non-spam yields zero
    | $BOGOFILTER -n -d $BOGOFILTER_DIR
    #else (E)
    :0Ec
    | $BOGOFILTER -s -d $BOGOFILTER_DIR

    :0fw
    | $BOGOFILTER -p -e

    :0:
    * ^X-Bogosity:.Spam
    spam

    :0:
    * ^X-Bogosity:.Ham
    non-spam
## Efface silencieusement tous les mails en langue asiatique
    UNREADABLE='[^?"]*big5|iso-2022-jp|ISO-2022-KR|euc-kr|gb2312|ks_c_5601-1987'
    :0:
    * 1^0 $ ^Subject:.*=\?($UNREADABLE)
    * 1^0 $ ^Content-Type:.*charset="?($UNREADABLE)
    spam-unreadable

    :0:
    * ^Content-Type:.*multipart
    * B ?? $ ^Content-Type:.*^?.*charset="?($UNREADABLE)
    spam-unreadable
    bf_compact ~/.bogofilter wordlist.db
    cd ~/.bogofilter
    bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
    mv wordlist.db wordlist.db.prv
    mv wordlist.db.new wordlist.db
    wordlist R,utilisateur,~/wordlist.db,1
    wordlist R,systeme,/var/spool/bogofilter/wordlist.db,1
    wordlist R,utilisateur,~/wordlist.db,2
    wordlist R,systeme,/var/spool/bogofilter/wordlist.db,3
    wordlist R,utilisateur,~/wordlist.db,5
    wordlist R,systeme,/var/spool/bogofilter/wordlist.db,4
    wordlist I,ignore,~/ignorelist.db,7
    wordlist R,systeme,/var/spool/bogofilter/wordlist.db,8
  echo ignorez.moi | bogoutil -l ~/ignorelist.db
    db_verify wordlist.db
    bogoutil -d wordlist.db | bogoutil -l wordlist.new.db
    db_dump -r wordlist.db > wordlist.txt
    db_load wordlist.new.db < wordlist.txt
    bogoutil -d wordlist.db > wordlist.raw.txt
    iconv -f iso-8859-1 -t utf-8 < wordlist.raw.txt > wordlist.utf8.txt
    bogoutil -l wordlist.db.new < wordlist.utf8.txt
    bogoutil --unicode=yes -m wordlist.db
    bogoutil -d wordlist.db > wordlist.utf8.txt
    iconv -f utf-8  -t iso-8859-1 < wordlist.utf8.txt > wordlist.raw.txt
    bogoutil -l wordlist.db.new < wordlist.raw.txt
    bogoutil --unicode=no -m wordlist.db
    cd ~/.bogofilter
    bogoutil -d wordlist.db > wordlist.txt
    mv wordlist.db wordlist.db.old
    bogoutil --db-transaction=yes -l wordlist.db < wordlist.txt
    rm wordlist.db.old wordlist.txt
    cd ~/.bogofilter
    bogoutil -d wordlist.db > wordlist.txt
    mv wordlist.db wordlist.db.old
    rm -f log.?????????? __db.???
    bogoutil --db-transaction=no -l wordlist.db < wordlist.txt
  bogoutil --db-recover /votre/repertoire/bogofilter
    ls -lh $BOGOFILTER_DIR/wordlist.db
    postconf | grep mailbox_size_limit
    postconf -e mailbox_size_limit=73000000
    bogoutil -d wordlist.db | \
    awk '{print $1 " " $2 " 0"}' | grep -v " 0 0" | \
    bogoutil -l wordlist.new.db
    bogoutil -d wordlist.db | \
    awk '{print $1 " 0 " $3}' | grep -v " 0 0" | \
    bogoutil -l wordlist.new.db
    $ cd build_unix
    $ sh ../dist/configure
    $ make
    # make install
    $ ./configure --with-libdb-prefix=/usr/local/BerkeleyDB-4.2
    $ make
    # make install-strip
    $ LD_LIBRARY_PATH=/usr/lib:/usr/local/lib:/usr/local/BerkeleyDB-4.2
        $ export LD_LIBRARY_PATH
    # pkg_add -r portupgrade cvsup
    # portupgrade -N bogofilter
    # env CPPFLAGS=-I/usr/local/include/db3 LIBS=-ldb3 LDFLAGS=-L/usr/local/lib ./configure
    macro index S "|bogofilter -s\ns=junkmail"  "Apprendre comme spam et sauvegarder dans junk"
    macro pager S "|bogofilter -s\ns=junkmail"  "Apprendre comme spam et sauvegarder dans junk"
    macro index H "|bogofilter -n\ns="          "Apprendre comme spam et sauvegarder"
    macro pager H "|bogofilter -n\ns="          "Apprendre comme spam et sauvegarder"
    condition:
    * test "bogofilter < %F"
    action:
    * move "#mh/VOTRE_BOITE_A_SPAM"
    Mark as ham / spam:
    * bogofilter -n -v -B "%f" (mark ham)
    * bogofilter -s -v -B "%f" (mark spam)
    #!/bin/sh
    CONFIGDIR=~/.bogofilter
    SPAMDIRS="$CONFIGDIR/spamdirs"
    MARKFILE="$CONFIGDIR/lastbogorun"
    for D in `cat "$SPAMDIRS"`; do
        find "$D" -type f -newer "$MARKFILE" -not -name ".sylpheed*"
    done|bogofilter -bNsv
    touch "$MARKFILE"
    Condition:
        header "X-Bogosity" matchcase "Spam"
    Action:
        move "#mh/Mailbox/Spam"
    Condition:
        header "X-Bogosity" matchcase "Unsure"
    Action:
        move "#mh/Mailbox/Unsure"
    Register Spam:
        bogofilter -s < "%f"

    Register Ham:
        bogofilter -n < "%f"

    Unregister Spam:
        bogofilter -S < "%f"

    Unregister Ham:
        bogofilter -N < "%f"
    BogoTest -vv:
        bogofilter -vv < "%f"

    BogoTest -vvv:
        bogofilter -vvv < "%f"
;; Lutte contre les pourriels (via bogofilter)
;;
(require 'vm-bogofilter)

;; Raccourcis-clavier pour bogofilter
;; C (shift-c) : pourriel
;; K (shift-k) : message correct
(define-key vm-mode-map "K" 'vm-bogofilter-is-spam)
(define-key vm-mode-map "C" 'vm-bogofilter-is-clean)

FAQ Bogofilter

Qu'est ce que Bogofilter?

Bogo-quoi?

Comment fonctionne Bogofilter?

Listes de diffusion

Comment débuter l'apprentissage de Bogofilter?

Comparaison de ces méthodes

Comment lancer l'entrainement avec des formats mbox et maildir?

Entrainement initial avec un format mbox:

Entrainement initial avec un format maildir:

Entrainement sur erreur avec un format mbox:

Entrainement sur erreur avec un format maildir:

Comment puis-je conserver une bonne acuité?

Quels formats de fichier Bogofilter comprend-il?

Quelle est la signification de la sortie verbeuse de Bogofilter?

Qu'est-ce que le mode Unsure?

Qu'est-ce que "l'entrainement sur erreur" et "l'entrainement sur épuisement"?

Que fait l'option '-u' (autoupdate)?

Comment puis-je utiliser SpamAssassin pour l'apprentissage de Bogofilter?

Comment traiter les spams asiatiques?

Comment compacter les bases?

Comment faire une requête manuelle sur la base?

Puis-je utiliser plusieurs listes de mots?

Puis-je indiquer à Bogofilter d'ignorer certains tokens?

Comment puis-je faire pour transformer la base des mots séparé dans le format combiné?

Comment faire si ma liste de mots est corrompue?

Comment puis-je convertir ma base de mots de/vers l'unicode?

Comment passer du mode non-transactionnel au mode transactionnel?

Comment passer du mode transactionnel au mode non transactionnel?

Pourquoi Bogofilter meurt après avoir afficher "Lock table is out of available locks" ou "Lock table is out of available object entries"

Comment se fait-il que j'obtienne des messages DB_PAGE_NOTFOUND?

Pourquoi obtient-on "Berkeley DB library configured to support only DB_PRIVATE environments" ou "Berkeley DB library configured to support only private environments"?

Bogofilter peut-il fonctionner en environnement multi-utilisateurs?

Puis-je partager une liste de mots par NFS?

Pourquoi Bogofilter fournit des codes de retour tels que 0 et 256 quand il est lancé à l'intérieur d'un programme?

Pourquoi mes scripts échouent depuis que j'ai mis à jour?

Pourquoi, depuis que j'ai changé de version, Bogofilter fonctionne-t-il moins bien?

Avec une wordlist combinée, comment puis-je détruire tous les tokens spams (ou les non-spams)?

Comment faire fonctionner Bogofilter sur Solaris, BSD, etc.?

Sur Solaris

Sur FreeBSD

Sur HP-UX

Puis-je utiliser la commande make sur mon système?

Comment compiler Bogofilter pour un utilisateur non-root ou avec un préfixe d'installation non standard?

Comment dois-je compiler Bogofilter avec les patches?

Comment rendre les exécutables plus petits?

datastore_db.c ne se compile pas!

Avec quel programme de messgerie Bogofilter fonctionne-t-il?

Comment utiliser Bogofilter avec Mutt?

Comment utiliser Bogofilter avec Sylpheed Claws?

Comment utiliser Bogofilter avec VM (un logiciel de messagerie sous Emacs)?

Comment utiliser Bogofilter avec MH-E (l'interface Emacs pour le système de mail MH)?

Pourquoi obtient-on "Berkeley DB library configured to support only DB_PRIVATE environments" ou
"Berkeley DB library configured to support only private environments"?