diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b70e77af8f95fbabadccba9b585b1368b9efce64..636d9c75dcb3907911b74a291911a2020bb0f322 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -40,6 +40,32 @@ create_graph:
     paths:
       - workflow.svg
 
+deep_run_workflow:
+  stage: test
+  image: 
+    name: kaczmarj/apptainer:latest
+    entrypoint: [""]
+  tags:
+    - custom
+  script:
+    ## Downloading apps
+    - mkdir appimgs
+    - apptainer build appimgs/PanGeTools.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangetools/pangetools:latest  
+    - apptainer build appimgs/pan1c-env.sif oras://registry.forgemia.inra.fr/alexis.mergez/pan1capps/pan1cenv:latest  
+    - apptainer build appimgs/pggb.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangratools/pggb:latest  
+    - apptainer build appimgs/pan1c-box.sif oras://registry.forgemia.inra.fr/alexis.mergez/pan1capps/pan1cbox:latest
+
+    ## Running workflow
+    - mkdir -p data/haplotypes
+    - cp example/*.fa.gz data/haplotypes
+    - HOME=$(pwd)
+    - rm config.yaml && mv example/config_CICD.yaml config.yaml
+    - apptainer run --bind $HOME:/root appimgs/pan1c-box.sif snakemake -c 4
+  artifacts:
+    paths:
+      - output/pan1c.pggb.03SC_CICD.gfa
+      - output/pan1c.pggb.03SC_CICD.workflow.stats.tsv
+  allow_failure: true
 
     
 
diff --git a/README.md b/README.md
index 25129741dc8860d82b659d5e1b133db4baf951de..eab2dc396127232bc3ffbc170fc70aa968d55c12 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-# Pan1c workflow
+# Pan1c (Pangenome at chromosome level) workflow
 
-Pan1c *(Pangenome 1 Chromosome)* : a snakemake workflow for creating a pangenome at a chromosomic scale.
+Pan1c : a snakemake workflow for creating pangenomes at chromosomic scale.
 Tools used within the workflow :
 - PanGeTools : https://forgemia.inra.fr/alexis.mergez/pangetools
 - PanGraTools : https://forgemia.inra.fr/alexis.mergez/pangratools
@@ -10,12 +10,13 @@ The file architecture for the workflow is as follow :
 ```
 Pan1c/
 â”œâ”€â”€ config.yaml
+â”œâ”€â”€ data
+â”‚Â Â  â””â”€â”€ haplotypes
+â”‚       â”œâ”€â”€ ref.hap<x>.fa.gz
+â”‚       â”œâ”€â”€ samp1.hap<x>.fa.gz
+â”‚       â””â”€â”€ ...
 â”œâ”€â”€ example
-â”‚Â Â  â”œâ”€â”€ CICC1445.fa.gz
-â”‚Â Â  â”œâ”€â”€ config_CICD.yaml
-â”‚Â Â  â”œâ”€â”€ R64.fa.gz
-â”‚Â Â  â”œâ”€â”€ SX2.fa.gz
-â”‚Â Â  â””â”€â”€ workflow.svg
+â”‚Â Â  â””â”€â”€ ...
 â”œâ”€â”€ getApps.sh
 â”œâ”€â”€ README.md
 â”œâ”€â”€ runSnakemake.sh
@@ -33,10 +34,14 @@ This DAG shows the worflow for a pangenome of `Arabidospis Thaliana` using the `
 ![Workflow DAG](example/workflow.svg)
 
 # Prepare your data
-This workflow can take chromosome level assemblies as well as contig level assemblies.  
+This workflow can take chromosome level assemblies as well as contig level assemblies but requires a reference assembly.  
 **Fasta files need to be compressed** using **bgzip2** (included in [PanGeTools](https://forgemia.inra.fr/alexis.mergez/pangetools)).
-Records id **must follow this pattern** : `<haplotype name>#<ctg|chr name>`. (`CHM13#chr01` for example where the fasta file is named CHM13.fa.gz).  
-Because of the clustering step, this pattern is only needed for the reference assembly.
+Sequences names of the reference **must follow this pattern** : `<sample>#<haplotype>#<contig or chromosome name>`.  
+For example, CHM13 chromosomes (haploÃ¯d) must be named `CHM13#1#chr..`. Only the reference needs to follow this pattern for its sequence names. Others haplotypes sequences will be renamed based on the reference and their respective fasta file names. 
+Fasta files **must also follow a pattern** : `<sample>.hap<haplotype>.fa.gz`. Once again with CHM13, the fasta file should be named : `CHM13.hap1.fa.gz`.  
+
+See [PanSN](https://github.com/pangenome/PanSN-spec) for more info on sequence naming.  
+
 > Note : Input files should be read-only to prevent snakemake to mess with them (which seems to happen in some rare cases).  
 
 # Download apptainer images
diff --git a/Snakefile b/Snakefile
index 72fa1e82fcf8ec100466251da1daed494fc23307..260d97dc1662f863105d4ebc1502d8bdc58e7d60 100644
--- a/Snakefile
+++ b/Snakefile
@@ -5,6 +5,7 @@ import numpy as np
 import gzip
 import re
 
+# Getting the list of haplotypes
 SAMPLES = np.unique([
     os.path.basename(f).split('.fa')[0] 
     for f in os.listdir("data/haplotypes/")
@@ -16,13 +17,25 @@ nHAP = len(SAMPLES)
 with gzip.open("data/haplotypes/"+config['reference'], "r") as handle:
     CHRLIST = [line.decode().split("#")[-1].split('\n')[0] for line in handle.readlines() if line.decode()[0] == ">"]
 
+def which_post_analysis():
+    ## Simple function to configure which parts of the workflow needs to be run
+    post_analysis_inputs = [     # Default post analysis steps
+        "output/pan1c.pggb."+config['name']+".workflow.stats.tsv",
+        "output/figures",
+        "output/panacus.reports"
+        ]
+    
+    # Optionals post analysis steps
+    if config["get_PAV"] == "True":
+        post_analysis_inputs.append("output/pav.matrices")
+
+    return post_analysis_inputs
+
 rule all:
     input:
         "output/pan1c.pggb."+config['name']+".gfa",
-        "output/pan1c.pggb."+config['name']+".chromGraph.stats.tsv",
-        "output/figures",
-        "output/pav.matrices",
-        "output/pan1c.pggb."+config['name']+".stats.tsv"
+        "output/pan1c.pggb."+config['name']+".chrGraph.stats.tsv",
+        which_post_analysis()
 
 rule samtools_index:
     # Using samtools faidx to index compressed fasta
@@ -177,7 +190,7 @@ rule aggregate_stats:
     input:
         "output/stats/"
     output:
-        "output/pan1c.pggb."+config['name']+".chromGraph.stats.tsv"
+        "output/pan1c.pggb."+config['name']+".chrGraph.stats.tsv"
     params:
         apppath=config['app.path'],
         panname=config['name']
@@ -207,34 +220,37 @@ rule get_pav:
 
 rule pggb_log_compression:
     input:
-        flag="output/pan1c.pggb."+config['name']+".chromGraph.stats.tsv"
+        flag="output/pan1c.pggb."+config['name']+".chrGraph.stats.tsv"
     output:
         tar="logs/pan1c.pggb."+config['name']+".logs.tar.gz",
-        chrLen="logs/pan1c.pggb."+config['name']+".chrInputLen.tsv"
     shell:
         """
         cd logs/pggb
         tar -zcvf ../../{output.tar} *
         cd ../..
+        """
 
-        if [ -f {output.chrLen} ]; then
-            rm {output.chrLen}
-        fi
-
-        for chrFasta in data/chrInputs/*.fa.gz; do
-            filename=$(basename $chrFasta .fa.gz)
-            num_bases=$(zgrep -v '^>' $chrFasta | tr -d '\\n' | wc -c)
-            echo "$filename\t$num_bases" >> {output.chrLen}
-        done
+rule pggb_input_stats:
+    input:
+        flag="output/pan1c.pggb."+config['name']+".chrGraph.stats.tsv"
+    output:
+        "data/chrInputs/pan1c.pggb."+config['name']+".chrInput.stats.tsv"
+    params:
+        apppath=config['app.path'],
+        panname=config['name']
+    shell:
+        """
+        apptainer run {params.apppath}/pan1c-env.sif python scripts/chrInputStats.py \
+            -f data/chrInputs/*.fa.gz -o {output} -p {params.panname}
         """
 
 rule workflow_statistics:
     input:
         tar="logs/pan1c.pggb."+config['name']+".logs.tar.gz",
-        chrLen="logs/pan1c.pggb."+config['name']+".chrInputLen.tsv",
-        chrGraphStats="output/pan1c.pggb."+config['name']+".chromGraph.stats.tsv"
+        chrInputStats="data/chrInputs/pan1c.pggb."+config['name']+".chrInput.stats.tsv",
+        chrGraphStats="output/pan1c.pggb."+config['name']+".chrGraph.stats.tsv"
     output:
-        tsv="output/pan1c.pggb."+config['name']+".stats.tsv",
+        tsv="output/pan1c.pggb."+config['name']+".workflow.stats.tsv",
         dir=directory("output/pggb.usage.figs")
     params:
         apppath=config['app.path']
@@ -242,5 +258,24 @@ rule workflow_statistics:
         """
         mkdir -p {output.dir}
         apptainer run {params.apppath}/pan1c-env.sif python scripts/workflowStats.py \
-            -i {input.tar} -l {input.chrLen} -g {input.chrGraphStats} -o {output.tsv} -f {output.dir}
-        """
\ No newline at end of file
+            -i {input.tar} -c {input.chrInputStats} -g {input.chrGraphStats} -o {output.tsv} -f {output.dir}
+        """
+
+rule panacus_stats:
+    input:
+        "data/chrGraphs/graphsList.txt"
+    output:
+        directory("output/panacus.reports")
+    params:
+        apppath=config['app.path'],
+        panname=config['name'],
+        refname=config['reference']
+    threads: 8
+    run:
+        shell("mkdir {output}")
+        # Getting the list of graphs
+        with open(input[0]) as handle:
+            graphList = [graph.rstrip("\n") for graph in handle.readlines()]
+        # Iterating over graphs
+        for graph in graphList:
+            shell("bash scripts/getPanacusHG.sh -g {graph} -r $(basename {params.refname} .fa.gz) -d data/chrGraphs/$(basename {graph} .gfa) -o {output}/$(basename {graph} .gfa).histgrowth.html -a {params.apppath} -t {threads}")
diff --git a/config.yaml b/config.yaml
index 190b4bb7316bb49454f97d36999fcfe6536ebe17..7c8337f53157be50a0559a47559cc488fa76a661 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,6 +1,7 @@
 name: '02H'
-reference: 'CHM13v2.0.fa.gz'
+reference: 'CHM13.hap1.fa.gz'
 app.path: '/home/amergez/work/apptainer'
 pggb.params: '-X --skip-viz'
 odgi.1Dviz.params: '-x 500 -b'
-odgi.pcov.params: '-x 500 -O'
\ No newline at end of file
+odgi.pcov.params: '-x 500 -O'
+get_PAV: 'False'
diff --git a/example/CICC1445.fa.gz b/example/CICC1445.hap1.fa.gz
similarity index 100%
rename from example/CICC1445.fa.gz
rename to example/CICC1445.hap1.fa.gz
diff --git a/example/R64.fa.gz b/example/R64.hap1.fa.gz
similarity index 93%
rename from example/R64.fa.gz
rename to example/R64.hap1.fa.gz
index 11e4e68704043afddb9f15926d30a25e8e934b07..dcdd7ed5bb290fdbf634c3156fc31aecd57bd826 100644
Binary files a/example/R64.fa.gz and b/example/R64.hap1.fa.gz differ
diff --git a/example/SX2.fa.gz b/example/SX2.hap1.fa.gz
similarity index 100%
rename from example/SX2.fa.gz
rename to example/SX2.hap1.fa.gz
diff --git a/example/config_CICD.yaml b/example/config_CICD.yaml
index 775a7fc6d3036b53d11b000ba4ecc1deea96c9e2..aeebbf68068d3e005c17201176b49eb1b7957cc4 100644
--- a/example/config_CICD.yaml
+++ b/example/config_CICD.yaml
@@ -1,6 +1,7 @@
 name: '03SC_CICD'
-reference: 'R64.fa.gz'
+reference: 'R64.hap1.fa.gz'
 app.path: 'appimgs/'
 pggb.params: '-X --skip-viz'
 odgi.1Dviz.params: '-x 500 -b'
-odgi.pcov.params: '-x 500 -O'
\ No newline at end of file
+odgi.pcov.params: '-x 500 -O'
+get_PAV: 'False'
diff --git a/scripts/analyzeWorflowData.py b/scripts/analyzeWorflowData.py
new file mode 100644
index 0000000000000000000000000000000000000000..de0e25985707db454bd9d0f628b5cb66007a2a01
--- /dev/null
+++ b/scripts/analyzeWorflowData.py
@@ -0,0 +1,113 @@
+"""
+Clustering script for Pan1c workflow
+
+Given a list of fasta files with records ids following the pattern <haplotype>#<chromosome id>,
+the script clusters sequence by chromosome and returns several fasta. 
+Each output fasta contains sequences related to one chromosome only.
+
+@author: alexis.mergez@inrae.fr
+@version: 1.0 
+"""
+
+from Bio import SeqIO
+import numpy as np
+import pandas as pd
+import argparse
+import gzip
+import os
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+## Arguments
+arg_parser = argparse.ArgumentParser(description='Pan1c input haplotype clustering')
+arg_parser.add_argument(
+    "--statsfiles",
+    "-i",
+    nargs="+",
+    dest = "statsfiles",
+    required = True,
+    help = "Workflow statistics file(s)"
+    )
+arg_parser.add_argument(
+    "--output",
+    "-o",
+    dest = "outdir",
+    required = True,
+    help = "Output directory"
+    )
+arg_parser.add_argument(
+    "--debug",
+    "-d",
+    action="store_true",
+    dest = "debug",
+    help = "Show debug"
+    )
+args = arg_parser.parse_args()
+
+## Toolbox
+def getRegEquation(x, y):
+    (slope, intercept), ssr, _1, _2, _3 = np.polyfit(x, y, 1, full = True)
+    ymean = np.mean(y)
+    sst = np.sum((y - ymean)**2)
+    r2 = (1 - (ssr/sst))[0]
+    return f'y = {slope:.2f}x + {intercept:.2f} (R2 : {r2:.2f})'
+
+## Main script
+dfList = []
+for file in args.statsfiles:
+    dfList.append(
+        pd.read_csv(
+            file,
+            sep='\t',
+        )
+    )
+
+df = pd.concat(dfList, ignore_index = True)
+
+if args.debug: print(df)
+df.to_csv("Final.tsv", sep='\t', index=False)
+
+# Memory versus mean base count
+sns.regplot(x=df["input.mean.length"], y=df.mem, line_kws={"color":"r","alpha":0.7,"lw":5})
+equation = getRegEquation(x=df["input.mean.length"], y=df.mem)
+plt.xlabel('Mean input sequences length (#bases)')
+plt.ylabel('Peak memory usage (GB)')
+plt.annotate(equation, xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red')
+plt.savefig("MemoryVSMeanSeqLength.png")
+plt.close()
+
+# Memory versus base count
+sns.regplot(x=df["input.total.length"], y=df.mem, line_kws={"color":"r","alpha":0.7,"lw":5})
+equation = getRegEquation(x=df["input.total.length"], y=df.mem)
+plt.xlabel('Total input sequences length (#bases)')
+plt.ylabel('Peak memory usage (GB)')
+plt.annotate(equation, xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red')
+plt.savefig("MemoryVSSeqLength.png")
+plt.close()
+
+# PCA
+if False:
+    excluded_columns = ["pangenome.name", "chrom.id", "time"]
+    columns = [
+        col 
+        for col in df.columns.tolist()
+        if col not in excluded_columns
+    ]
+    crdf = df.copy()
+    crdf[columns] = (df[columns]-df[columns].mean())/df[columns].std()
+
+    if args.debug: print(crdf)
+
+    from sklearn.decomposition import PCA
+    pca = PCA(n_components=3)
+    newX = pca.fit_transform(crdf[columns])
+
+    pcaDF = pd.DataFrame(newX)
+    pcaDF.columns = ["D1", "D2", "D3"]
+    print(pcaDF)
+
+    finalDF = pd.concat([df[excluded_columns], pcaDF], axis = 1)
+    print(finalDF)
+
+    fig = sns.lmplot(x="D1", y="D2", hue="chrom.id", data = finalDF, fit_reg=False)
+    plt.savefig("Test.png")
\ No newline at end of file
diff --git a/scripts/chrInputStats.py b/scripts/chrInputStats.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d23c75864aa9939babbc37aa9cfacf15fd04eac
--- /dev/null
+++ b/scripts/chrInputStats.py
@@ -0,0 +1,115 @@
+"""
+Input statistics script for Pan1c workflow
+
+Given a fasta input made for pggb, computes stats regarding the complexity of the sequences
+
+@author: alexis.mergez@inrae.fr
+@version: 1.0 
+"""
+
+from Bio import SeqIO
+import numpy as np
+import pandas as pd
+import argparse
+import gzip
+import os
+
+## Arguments
+arg_parser = argparse.ArgumentParser(description='Pan1c pggb input statistics')
+arg_parser.add_argument(
+    "--fasta",
+    "-f",
+    nargs="+",
+    dest = "fastafiles",
+    required = True,
+    help = "Fasta file(s)"
+    )
+arg_parser.add_argument(
+    "--output",
+    "-o",
+    dest = "output",
+    required = True,
+    help = "Output TSV"
+    )
+arg_parser.add_argument(
+    "--panname",
+    "-p",
+    dest = "panname",
+    required = True,
+    help = "Pangenome name"
+    )
+arg_parser.add_argument(
+    "--debug",
+    "-d",
+    action="store_true",
+    dest = "debug",
+    help = "Show debug"
+    )
+args = arg_parser.parse_args()
+
+## Toolbox
+
+## Main script
+seqDict = {}
+
+# Parsing fasta files
+for filename in args.fastafiles:
+    
+    chrName = os.path.basename(filename).split(".fa.gz")[0]
+
+    # Reading .fa.gz file and adding records to seqDict
+    with gzip.open(filename, "rt") as handle:
+        sequences = SeqIO.parse(handle, "fasta")
+        
+        for record in sequences:
+            seqDict[(chrName, record.id)] = record.seq
+
+chromList = np.unique([x for x, y in seqDict.keys()])
+
+aggregatedStats = {}
+
+for chrom in chromList:
+    _stats = {
+        "input.#N": 0,
+        "input.total.length": 0
+        }
+    _lengths, _Nper = [], []
+    for (chrName, seqid), seq in seqDict.items():
+        if chrName == chrom:
+            # Counting number of Ns
+            _stats["input.#N"] += seq.count("N")
+            # Counting total bases
+            _stats["input.total.length"] += len(seq)
+            # Saving length and N percentage
+            _lengths.append(len(seq))
+            _Nper.append((seq.count("N")/len(seq)))
+
+    _stats["input.mean.N%"] = np.mean(_Nper)*100
+    _stats["input.mean.length"] = np.mean(_lengths)
+    _stats["input.#sequences"] = len(_lengths)
+
+    #Computing L50
+    _lengths = sorted(_lengths, reverse = True)
+    halfTotal = _stats["input.total.length"]/2
+    cumulativeLength, L50 = 0, 0
+    for length in _lengths:
+        cumulativeLength += length
+        L50 += 1
+
+        if cumulativeLength >= halfTotal:
+            break
+    
+    _stats["input.L50"] = L50
+
+    aggregatedStats[(args.panname,chrom)] = _stats
+
+if args.debug: print(aggregatedStats)    
+
+df = pd.DataFrame.from_dict(aggregatedStats, orient='index')
+df.reset_index(inplace=True)
+df.rename(columns={df.columns[0]: 'pangenome.name', df.columns[1]: 'chrom.id'}, inplace = True)
+df.to_csv(
+    args.output,
+    sep='\t',
+    index = False
+)
\ No newline at end of file
diff --git a/scripts/getPanacusHG.sh b/scripts/getPanacusHG.sh
new file mode 100755
index 0000000000000000000000000000000000000000..25ba26bcee0a520775b18a0f4b928b9be824c647
--- /dev/null
+++ b/scripts/getPanacusHG.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Get a GFA and returns a Panacus report
+
+# Initializing arguments
+gfa=""          # GFA path
+refname=""      # Reference haplotype
+appdir=""       # Directory containing apptainer images
+threads=""      
+chrdir=""       # Chromosome directory (directory used by pggb to store step files like .paf, etc...)
+output=""       # Output panacus figure
+
+## Getting arguments
+while getopts "g:r:a:t:d:o:" option; do
+    case "$option" in
+        g) gfa="$OPTARG";;
+        r) refname="$OPTARG";;
+        a) appdir="$OPTARG";;
+        t) threads="$OPTARG";;
+        d) chrdir="$OPTARG";;
+        o) output="$OPTARG";;
+        \?) echo "Usage: $0 [-g gfa] [-r refname] [-a appdir] [-t threads] [-d chrdir] [-o output]" >&2
+            exit 1;;
+    esac
+done
+
+# Getting chromosome name
+chrname=$(basename ${gfa} .gfa)
+ref=$(echo $refname | sed 's/.hap/#/')
+
+
+# Getting paths in chromosome graph
+echo "[getPanacusHG::odgi::paths] Running on ${gfa}"
+apptainer run --app odgi "${appdir}/PanGeTools.sif" paths -i ${gfa} -L | grep -ve "$ref" > ${chrdir}/$chrname.paths.noref.txt
+
+# Running Panacus
+echo "[getPanacusHG::panacus] Running on ${gfa}"
+apptainer run --app panacus $appdir/PanGeTools.sif histgrowth \
+    -t $threads -l 1,2,1,1,1 -q 0,0,1,0.5,0.1 -S -a -s ${chrdir}/$chrname.paths.noref.txt -c all -o html $gfa > ${output}
+
+
diff --git a/scripts/ragtagChromInfer.sh b/scripts/ragtagChromInfer.sh
index 7721a181f79d19ec6a408817956613b6e8619c38..c68f0d1fd8f05f9b7471d360f2f42b91cba229e7 100755
--- a/scripts/ragtagChromInfer.sh
+++ b/scripts/ragtagChromInfer.sh
@@ -24,21 +24,24 @@ while getopts "d:a:t:r:q:o:" option; do
 done
 
 ## Main script
-hap=$(basename $inputquery .fa.gz)
-ref=$(basename $inputref .fa.gz)
+sample=$(basename $inputquery .fa.gz)
+hapID=$(echo $sample | sed 's/.hap/#/')
+fullref=$(basename $inputref .fa.gz)
+ref=$(echo $fullref | sed 's/.hap/#/')
+
 mkdir -p $tmpdir
 
 # Running ragtag scaffold
-apptainer run $appdir/pytools.sif ragtag.py scaffold \
+apptainer run $appdir/pan1c-env.sif ragtag.py scaffold \
     -t $threads -o $tmpdir $inputref $inputquery
 
 # Renaming sequence according to naming scheme
 grep ">${ref}#chr*" -A1 $tmpdir/ragtag.scaffold.fasta | \
-    sed "s/${ref}#chr\([^_]*\)_RagTag/${hap}#chr\1/g" > $tmpdir/${hap}.ragtagged.fa
+    sed "s/${ref}#chr\([^_]*\)_RagTag/${hapID}#chr\1/g" > $tmpdir/${sample}.ragtagged.fa
 
 # Compressing fasta
 apptainer run --app bgzip $appdir/PanGeTools.sif \
-    -@ $threads $tmpdir/${hap}.ragtagged.fa
+    -@ $threads $tmpdir/${sample}.ragtagged.fa
 
 # Moving fa.gz to output dir
-mv $tmpdir/${hap}.ragtagged.fa.gz $output
+mv $tmpdir/${sample}.ragtagged.fa.gz $output
diff --git a/scripts/sr_mapping2graph.sh b/scripts/sr_mapping2graph.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e203385b0731a6f5582372150cc1add48ab4787f
--- /dev/null
+++ b/scripts/sr_mapping2graph.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Map given shrot reads to the pangenome graph using vg giraffe. See wiki for detailled explanation.
+
+# Initializing arguments
+shortreads=""   # Fastq file to map against the graph
+appdir=""       # Directory containing apptainer images
+threads=""      # Threads count
+graph=""        # Pangenome graph
+output=""       # Output gam
+
+## Getting arguments
+while getopts "a:t:g:f:o:" option; do
+    case "$option" in
+        a) appdir="$OPTARG";;
+        t) threads="$OPTARG";;
+        g) graph="$OPTARG";;
+        f) shortreads="$OPTARG";;
+        o) output="$OPTARG";;
+        \?) echo "Usage: $0 [-a apptainer dir] [-t threads] [-g graph] [-f fastq] [-o output gam]" >&2
+            exit 1;;
+    esac
+done
+
+## Main script
+apptainer run --app vg $appdir/PanGeTools.sif \
+    autoindex -t $threads -g $graph --workflow giraffe -p $(basename $graph)
+
+apptainer run --app vg $appdir/PanGeTools.sif \
+    giraffe -Z ${graph}.giraffe.gbz -m ${graph}.min -d ${graph}.dist -f $shortreads -t $threads -p > $output
+
+apptainer run --app vg $appdir/PanGeTools.sif \
+    stats -a $output
diff --git a/scripts/workflowStats.py b/scripts/workflowStats.py
index 8426780be5756c324948afd22fa3b847cb299deb..fa9e4f317fb5ae12be9830e3ff9ad1113cfc0fac 100644
--- a/scripts/workflowStats.py
+++ b/scripts/workflowStats.py
@@ -29,12 +29,12 @@ arg_parser.add_argument(
     help = "Tarballs containing time logs"
     )
 arg_parser.add_argument(
-    "--chrinputlen",
-    "-l",
+    "--chrInputStats",
+    "-c",
     nargs = "+",
-    dest = "chrinputlenfiles",
+    dest = "chrInputStatsFiles",
     required = True,
-    help = "One or more TSV containing input sequence length (generated by Pan1c workflow)"
+    help = "One or more TSV containing input chrInput stats (generated by Pan1c workflow)"
     )
 arg_parser.add_argument(
     "--graphstats",
@@ -118,23 +118,25 @@ for tarball in args.tarballs:
 
 ## Parsing chromosome input file size (in #bases).
 # Iterating over input length files
-for chrInputLength in args.chrinputlenfiles:
+for chrInputStats in args.chrInputStatsFiles:
 
-    panname = os.path.basename(chrInputLength).split(".")[2] # Works if name is pan1c.pggb.<panname>.chrInputLen.tsv
+    panname = os.path.basename(chrInputStats).split(".")[2] # Works if name is pan1c.pggb.<panname>.chrInput.stats.tsv
     
     # Reading chrInputLen to get the number of bases used to construct chromosomes
-    chrSeqLength = pd.read_csv(
-        chrInputLength,
-        sep="\t",
-        header=None,
-        index_col=0
-    ).to_dict()[1]
-
-    if args.debug: print("[timeStats::debug]", chrSeqLength)
+    chrInputStatsdf = pd.read_csv(
+        chrInputStats,
+        sep="\t"
+    )
 
-    # Adding base counts to aggregatedData
-    for chromName, chromLength in chrSeqLength.items():
-        aggregatedData[(panname,chromName)]["nbases"] = chromLength
+    # Iterating over dataframe's rows
+    for index, row in chrInputStatsdf.iterrows():
+        colnames = [
+            col 
+            for col in chrInputStatsdf.columns.tolist() 
+            if col not in ["pangenome.name", "chrom.id"]
+            ]
+        for column in colnames:
+            aggregatedData[(row["pangenome.name"], row["chrom.id"])][column] = row[column]
 
 ## Parsing graph statistics generated by odgi stats
 graphColDict = {
@@ -170,7 +172,7 @@ df.rename(columns=graphColDict, inplace = True)
 df.time = pd.to_timedelta(df.time)
 df.mem = df.mem.astype(float)
 df.cpu = df.cpu.astype(int)
-df.nbases = df.nbases.astype(int)
+df["input.total.length"] = df["input.total.length"].astype(int)
 
 if args.debug: print("[timeStats::debug]", df.dtypes)
 
@@ -179,8 +181,8 @@ df.to_csv(args.output, sep='\t', index=False)
 
 # Creating some figures
 # Time versus base count
-sns.regplot(x=df.nbases, y=df.time.dt.total_seconds(), line_kws={"color":"r","alpha":0.7,"lw":5})
-equation = getRegEquation(x=df.nbases, y=df.time.dt.total_seconds())
+sns.regplot(x=df["input.total.length"], y=df.time.dt.total_seconds(), line_kws={"color":"r","alpha":0.7,"lw":5})
+equation = getRegEquation(x=df["input.total.length"], y=df.time.dt.total_seconds())
 plt.xlabel('Total input sequences length (#bases)')
 plt.ylabel('Graph creation time (s)')
 plt.annotate(equation, xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red')
@@ -188,8 +190,8 @@ plt.savefig(os.path.join(args.figdir,"TimeVSSeqLength.png"))
 plt.close()
 
 # Memory versus base count
-sns.regplot(x=df.nbases, y=df.mem, line_kws={"color":"r","alpha":0.7,"lw":5})
-equation = getRegEquation(x=df.nbases, y=df.mem)
+sns.regplot(x=df["input.total.length"], y=df.mem, line_kws={"color":"r","alpha":0.7,"lw":5})
+equation = getRegEquation(x=df["input.total.length"], y=df.mem)
 plt.xlabel('Total input sequences length (#bases)')
 plt.ylabel('Peak memory usage (GB)')
 plt.annotate(equation, xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red')
@@ -197,8 +199,8 @@ plt.savefig(os.path.join(args.figdir,"MemoryVSSeqLength.png"))
 plt.close()
 
 # CPU versus base count
-sns.regplot(x=df.nbases, y=df.cpu, line_kws={"color":"r","alpha":0.7,"lw":5})
-equation = getRegEquation(x=df.nbases, y=df.cpu)
+sns.regplot(x=df["input.total.length"], y=df.cpu, line_kws={"color":"r","alpha":0.7,"lw":5})
+equation = getRegEquation(x=df["input.total.length"], y=df.cpu)
 plt.xlabel('Total input sequences length (#bases)')
 plt.ylabel('CPU usage (%)')
 plt.annotate(equation, xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red')