FSharpBio


FSharpBio: Documentation

FSharpBio aims to be a user-friendly library for Bioinformatics written in F#. It contains the basic data structures for common biological objects like amino acids and nucleotides based on chemical formulas and chemical elements. It facilitates some basic machine learning task as well as statistical analysis of biological data set.

This example demonstrates using a function defined in FSharpBio library.

1: 
2: 
3: 
4: 
5: 
#r "FSharpBio.dll"
#r "FSharpBio.IO.dll"

open FSharpBio
open FSharpBio.IO

Read a fastA file

Converter reads character and returns it as either amino acid or nucleotide depending on the OptionConverter respectivly. For a protein fastA use: OptionConverter.charToOptionStandardAminoAcid and for gene fastA use: OptionConverter.charToOptionStandardNucleotid.

1: 
2: 
3: 
4: 
5: 
6: 
7: 
8: 
let converter = BioSequences.OptionConverter.charToOptionStandardAminoAcid

// Path to .fastA file
let fastaPath = __SOURCE_DIRECTORY__ + "/data/chlamy3proteins.fasta"
let chlamy3proteins =
    // Read .fastA
    FastA.fromFileWithOptional converter fastaPath
    |> Seq.toArray

Digest proteins

Digests the proteins from .fastA file to peptides. Trypsin is used as the protease

1: 
2: 
3: 
4: 
let trypsinPeptides =
    chlamy3proteins
    |> Seq.map (fun fastaItem -> fastaItem.Sequence)    
    |> Seq.collect (fun aas -> Digestion.digest Digestion.trypsin aas)

Mass calculation

Calculates peptide masses (monoisotopic) of previous digestion.

1: 
2: 
3: 
4: 
5: 
6: 
let peptideMasses =
    trypsinPeptides
    |> Seq.map (fun peptide -> let fPEptide = BioSequences.toFormula peptide
                               Formula.add fPEptide Formula.Table.H2O
                               |> Formula.monoisoMass
                        )

Mass histogram

Shows distribution of monoisotopic peptide masses

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
#r "FSharp.Charting.dll"
#r "FSharpStats.dll"

open FSharpStats
open FSharp.Charting

let bw = 0.7
let histo = Descriptive.Histogram.create bw peptideMasses

Chart.Column (histo |> Descriptive.Histogram.getZip)
|> Chart.WithXAxis(Max=8000.,Min=300.)

Chart

1: 
2: 
//// Write
//let _ = chlamy3proteins |> FastA.write AminoAcids.symbol fastaPath
namespace FSharpBio
namespace FSharpBio.IO
val converter : (char -> AminoAcids.AminoAcid option)

Full name: FSharpBio.IO.converter
module BioSequences

from FSharpBio
module OptionConverter

from FSharpBio.BioSequences
val charToOptionStandardAminoAcid : aac:char -> AminoAcids.AminoAcid option

Full name: FSharpBio.BioSequences.OptionConverter.charToOptionStandardAminoAcid
val fastaPath : string

Full name: FSharpBio.IO.fastaPath
val chlamy3proteins : FastA.FastaItem<seq<AminoAcids.AminoAcid>> []

Full name: FSharpBio.IO.chlamy3proteins
module FastA

from FSharpBio.IO
val fromFileWithOptional : converter:(char -> Option<'a>) -> filePath:string -> seq<FastA.FastaItem<seq<'a>>>

Full name: FSharpBio.IO.FastA.fromFileWithOptional
module Seq

from Microsoft.FSharp.Collections
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
val trypsinPeptides : seq<AminoAcids.AminoAcid list>

Full name: FSharpBio.IO.trypsinPeptides
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val fastaItem : FastA.FastaItem<seq<AminoAcids.AminoAcid>>
FastA.FastaItem.Sequence: seq<AminoAcids.AminoAcid>
val collect : mapping:('T -> #seq<'U>) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.collect
val aas : seq<AminoAcids.AminoAcid>
module Digestion

from FSharpBio
val digest : protease:Digestion.Protease -> aas:seq<AminoAcids.AminoAcid> -> seq<AminoAcids.AminoAcid list>

Full name: FSharpBio.Digestion.digest
val trypsin : Digestion.Protease

Full name: FSharpBio.Digestion.trypsin
val peptideMasses : seq<float>

Full name: FSharpBio.IO.peptideMasses
val peptide : AminoAcids.AminoAcid list
val fPEptide : Map<Elements.Element,float>
val toFormula : bs:seq<#IBioItem> -> Map<Elements.Element,float>

Full name: FSharpBio.BioSequences.toFormula
module Formula

from FSharpBio
val add : f1:Formula.Formula -> f2:Formula.Formula -> Map<Elements.Element,float>

Full name: FSharpBio.Formula.add
module Table

from FSharpBio.Formula
val H2O : Formula.Formula

Full name: FSharpBio.Formula.Table.H2O
val monoisoMass : f:Formula.Formula -> float

Full name: FSharpBio.Formula.monoisoMass
namespace FSharpStats
namespace FSharp
namespace FSharp.Charting
val bw : float

Full name: FSharpBio.IO.bw
val histo : Map<float,int>

Full name: FSharpBio.IO.histo
namespace FSharpStats.Descriptive
module Histogram

from FSharpStats.Descriptive
val create : bandwidth:float -> data:seq<float> -> Map<float,int>

Full name: FSharpStats.Descriptive.Histogram.create
type Chart =
  static member Area : data:seq<#value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string -> GenericChart
  static member Area : data:seq<#key * #value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string -> GenericChart
  static member Bar : data:seq<#value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string -> GenericChart
  static member Bar : data:seq<#key * #value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string -> GenericChart
  static member BoxPlotFromData : data:seq<#key * #seq<'a2>> * ?Name:string * ?Title:string * ?Color:Color * ?XTitle:string * ?YTitle:string * ?Percentile:int * ?ShowAverage:bool * ?ShowMedian:bool * ?ShowUnusualValues:bool * ?WhiskerPercentile:int -> GenericChart (requires 'a2 :> value)
  static member BoxPlotFromStatistics : data:seq<#key * #value * #value * #value * #value * #value * #value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string * ?Percentile:int * ?ShowAverage:bool * ?ShowMedian:bool * ?ShowUnusualValues:bool * ?WhiskerPercentile:int -> GenericChart
  static member Bubble : data:seq<#value * #value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string * ?BubbleMaxSize:int * ?BubbleMinSize:int * ?BubbleScaleMax:float * ?BubbleScaleMin:float * ?UseSizeForLabel:bool -> GenericChart
  static member Bubble : data:seq<#key * #value * #value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string * ?BubbleMaxSize:int * ?BubbleMinSize:int * ?BubbleScaleMax:float * ?BubbleScaleMin:float * ?UseSizeForLabel:bool -> GenericChart
  static member Candlestick : data:seq<#value * #value * #value * #value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string -> CandlestickChart
  static member Candlestick : data:seq<#key * #value * #value * #value * #value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:Color * ?XTitle:string * ?YTitle:string -> CandlestickChart
  ...

Full name: FSharp.Charting.Chart
static member Chart.Column : data:seq<#value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:System.Drawing.Color * ?XTitle:string * ?YTitle:string -> ChartTypes.GenericChart
static member Chart.Column : data:seq<#key * #value> * ?Name:string * ?Title:string * ?Labels:#seq<string> * ?Color:System.Drawing.Color * ?XTitle:string * ?YTitle:string -> ChartTypes.GenericChart
val getZip : hist:Map<float,int> -> seq<float * int>

Full name: FSharpStats.Descriptive.Histogram.getZip
static member Chart.WithXAxis : ?Enabled:bool * ?Title:string * ?Max:float * ?Min:float * ?Log:bool * ?ArrowStyle:ChartTypes.AxisArrowStyle * ?LabelStyle:ChartTypes.LabelStyle * ?MajorGrid:ChartTypes.Grid * ?MinorGrid:ChartTypes.Grid * ?MajorTickMark:ChartTypes.TickMark * ?MinorTickMark:ChartTypes.TickMark * ?TitleAlignment:System.Drawing.StringAlignment * ?TitleFontName:string * ?TitleFontSize:float * ?TitleFontStyle:System.Drawing.FontStyle * ?TitleColor:System.Drawing.Color * ?ToolTip:string -> ('a0 -> 'a0) (requires 'a0 :> ChartTypes.GenericChart)
Fork me on GitHub