diff --git a/README.md b/README.md
index 552a70b43ef1bb1535f327d088cd1d49335d13dc..4ee10d3a2ef3ab9a989f8b5cfb9f6d8b3d531a81 100644
--- a/README.md
+++ b/README.md
@@ -7,26 +7,27 @@ Tools used within the workflow :
 
 The file architecture for the workflow is as follow :
 ```
-pan1c
+Pan1c/
 â”œâ”€â”€ config.yaml
-â”œâ”€â”€ copyHaplotypes.sh
-â”œâ”€â”€ data
-â”‚Â Â  â””â”€â”€ haplotypes
+â”œâ”€â”€ README.md
 â”œâ”€â”€ runSnakemake.sh
 â”œâ”€â”€ scripts
-â”‚Â Â  â”œâ”€â”€ bin_split.py
-â”‚Â Â  â””â”€â”€ statsAggregation.py
-â”œâ”€â”€ Snakefile
-â””â”€â”€ README.md
+â”‚   â”œâ”€â”€ bin_split.py
+â”‚   â”œâ”€â”€ getPanachePAV.sh
+â”‚   â”œâ”€â”€ inputClustering.py
+â”‚   â”œâ”€â”€ ragtagChromInfer.sh
+â”‚   â””â”€â”€ statsAggregation.py
+â””â”€â”€ Snakefile
 ```
 # Prepare your data
-This workflow can take chromosome level assemblies as well as contig level assembly.  
+This workflow can take chromosome level assemblies as well as contig level assemblies.  
 **Fasta files need to be compressed** using bgzip2 (included in [PanGeTools](https://forgemia.inra.fr/alexis.mergez/pangetools)).
-Sequence names **should follow this pattern** : `<haplotype name>#<ctg|chr name>`. (`CHM13#chr01` for example).  
-Make your input file read only to prevent snakemake to mess with them.  
+Records id **must follow this pattern** : `<haplotype name>#<ctg|chr name>`. (`CHM13#chr01` for example).  
+Because of the clustering step, this pattern is only needed for the reference assembly.
+Input files should be read only to prevent snakemake to mess with them (which seems to happen in some rare cases).  
 
 # Usage
-Create `data/haplotypes` and put all haplotypes in it.
-Change reference name and apptainer image path in `config.yml`.  
-Change variables in `runSnakemake.sh` to match your needs (job name, mail, etc...).
-Run `runSnakemake.sh` and wait !  
\ No newline at end of file
+Clone this repo and create `data/haplotypes`. Place all your haplotypes in it.
+Change the reference name and the apptainer image path in `config.yaml`.  
+Finally, change variables in `runSnakemake.sh` to match your needs (threads, memory, job name, mail, etc...).
+Go in the root directory of the repo and run `sbatch runSnakemake.sh` !
\ No newline at end of file
diff --git a/Snakefile b/Snakefile
index 45335d568e6411fa5144b0079e7dda3eb960dae9..a2eef6fd6651c937ca4de78a651f28e853283e26 100644
--- a/Snakefile
+++ b/Snakefile
@@ -4,7 +4,6 @@ import os
 import numpy as np
 
 SAMPLES = np.unique([os.path.basename(f).split('.fa')[0] for f in os.listdir("data/haplotypes/")])
-print(SAMPLES)
 nHAP = len(SAMPLES)
 
 rule all:
@@ -44,34 +43,33 @@ rule samtools_index:
         "apptainer run --app samtools {params.apppath}/PanGeTools.sif "
         "faidx {input.fa}"
 
-rule wfmash_premapping:
-    # Mapping haplotypes contigs/chr on a reference 
+rule ragtag_scaffolding:
+    # Scaffold input haplotype against the reference to infer chromosome scale sequences
     input:
         ref="data/haplotypes/"+config['reference'],
         reffai="data/haplotypes/"+config['reference']+".fai",
         refgzi="data/haplotypes/"+config['reference']+".gzi",
-        fa="data/input_haplotypes/aggregated.fa.gz",
-        fagzi="data/input_haplotypes/aggregated.fa.gz.gzi",
-        fafai="data/input_haplotypes/aggregated.fa.gz.fai"
+        fa="data/haplotypes/{haplotype}.fa.gz"
     output:
-        protected("data/premapping/premapping.paf")
-    params: 
-        wfmash=config['wfmash.params'],
+        "data/ragtag_hap/{haplotype}.ragtagged.fa.gz"
+    threads: 8
+    params:
         apppath=config["app.path"]
-    threads: workflow.cores
-    log: 
-        time="logs/premapping/premapping.time.log"
     shell:
         """
-        /usr/bin/time -v -o {log.time} \
-            apptainer run --app wfmash {params.apppath}/PanGeTools.sif \
-            -t {threads} {params.wfmash} {input.ref} {input.fa} > {output}
+        bash scripts/ragtagChromInfer.sh \
+            -d $(dirname {output})/{wildcards.haplotype} \
+            -a {params.apppath} \
+            -t {threads} \
+            -r {input.ref} \
+            -q {input.fa} \
+            -o {output}
         """
+
 checkpoint clustering:
     # Reading alignment file to create bins for each chromosome
     input:
-        paf="data/premapping/premapping.paf",
-        fa="data/input_haplotypes/aggregated.fa.gz"
+        expand('data/ragtag_hap/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
     output:
         directory("data/chrInputs")
     threads: workflow.cores
@@ -80,7 +78,7 @@ checkpoint clustering:
     shell:
         """
         mkdir -p {output}
-        python scripts/bin_split.py --paf {input.paf} --dir {output} --fasta {input.fa}
+        apptainer run {params.apppath}/pytools.sif python scripts/inputClustering.py --fasta {input} --output {output}
         for file in {output}/*.fa; do
             apptainer run --app bgzip {params.apppath}/PanGeTools.sif -@ {threads} $file
         done
diff --git a/scripts/inputClustering.py b/scripts/inputClustering.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ceed3406048705634322cc7c10549314ca4419e
--- /dev/null
+++ b/scripts/inputClustering.py
@@ -0,0 +1,89 @@
+"""
+Clustering script for Pan1c workflow
+
+Given a list of fasta files with records ids following the pattern <haplotype>#<chromosome id>,
+the script clusters sequence by chromosome and returns several fasta. 
+Each output fasta contains sequences related to one chromosome only.
+
+@author: alexis.mergez@inrae.fr
+@version: 1.0 
+"""
+
+from Bio import SeqIO
+import numpy as np
+import argparse
+import gzip
+import os
+
+## Arguments
+arg_parser = argparse.ArgumentParser(description='Pan1c input haplotype clustering')
+arg_parser.add_argument(
+    "--fasta",
+    "-f",
+    nargs="+",
+    dest = "fastafiles",
+    required = True,
+    help = "Fasta file(s)"
+    )
+arg_parser.add_argument(
+    "--output",
+    "-o",
+    dest = "outdir",
+    required = True,
+    help = "Output directory"
+    )
+arg_parser.add_argument(
+    "--debug",
+    "-d",
+    action="store_true",
+    dest = "debug",
+    help = "Show debug"
+    )
+args = arg_parser.parse_args()
+
+## Toolbox
+def getChr(name):
+    """
+    Simply returns the chromosome name of a given fasta record
+    """
+    return name.split('#')[-1]
+
+## Main script
+seqDict = {}
+
+# Parsing fasta files
+for filename in args.fastafiles:
+    
+    # Reading .fa.gz file and adding records to seqDict
+    with gzip.open(filename, "rt") as handle:
+        sequences = SeqIO.parse(handle, "fasta")
+        
+        for record in sequences:
+            seqDict[record.id] = record
+
+# Inferring the list of available chromosomes from the sequence record ids
+chrList = np.unique([
+    getChr(recordid) for recordid in seqDict.keys()
+])
+
+# Clustering records based on chromosome tag in records id
+chrSeq = {}
+
+for recordid, record in seqDict.items():
+    # Getting the chromosome id
+    _chrname = getChr(recordid)
+
+    # If not encountered before, create a key for this chromosome in chrSeq
+    if _chrname not in list(chrSeq.keys()):
+        chrSeq[_chrname] = []
+
+    # Add the record to the chromosome bin in chrSeq
+    chrSeq[_chrname].append(record)
+
+# Debug purpose print
+if args.debug: print(chrSeq.keys())
+
+# Writing chromosome specific fasta file
+for chrName in chrSeq.keys():
+    with open(os.path.join(args.outdir, f"{chrName}.fa"), "w") as output_handle:
+        SeqIO.write(chrSeq[chrName], output_handle, "fasta")
\ No newline at end of file
diff --git a/scripts/ragtagChromInfer.sh b/scripts/ragtagChromInfer.sh
new file mode 100755
index 0000000000000000000000000000000000000000..bb529b6aaccc95b767e0b61bd31b754487e539ba
--- /dev/null
+++ b/scripts/ragtagChromInfer.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Produce a Scaffolded version of the haplotype using RagTag
+
+# Initializing arguments
+tmpdir=""       # Directory used by ragtag
+appdir=""       # Directory containing apptainer images
+threads=""      # Threads count
+inputref=""     # Reference fasta
+inputquery=""   # Query fasta
+output=""       # Output fasta
+
+## Getting arguments
+while getopts "d:a:t:r:q:o:" option; do
+    case "$option" in
+        d) tmpdir="$OPTARG";;
+        a) appdir="$OPTARG";;
+        t) threads="$OPTARG";;
+        r) inputref="$OPTARG";;
+        q) inputquery="$OPTARG";;
+        o) output="$OPTARG";;
+        \?) echo "Usage: $0 [-d tmpdir] [-a appdir] [-t threads] [-r inputref] [-q inputquery] [-o output]" >&2
+            exit 1;;
+    esac
+done
+
+## Main script
+hap=$(basename $inputquery .fa.gz)
+ref=$(basename $inputref .fa.gz)
+mkdir -p $tmpdir
+
+# Running ragtag scaffold
+apptainer run $appdir/pytools.sif ragtag.py scaffold \
+    -t $threads -o $tmpdir $inputref $inputquery
+
+# Renaming sequence according to naming scheme
+grep ">${ref}#chr*" -A1 $tmpdir/ragtag.scaffold.fasta | \
+    sed "s/${ref}#chr\([^_]*\)_RagTag/${hap}#chr\1/g" > $tmpdir/${hap}.ragtagged.fa
+
+# Compressing fasta
+apptainer run --app bgzip $appdir/PanGeTools.sif \
+    -@ $threads $tmpdir/${hap}.ragtagged.fa
+
+# Moving fa.gz to output dir
+mv $tmpdir/${hap}.ragtagged.fa.gz $output