1- // A string is called simple (or a Lyndon word), if it is strictly smaller than any of its own nontrivial suffixes.
2- // Duval (1983) developed an algorithm for finding the standard factorization that runs in linear time and constant space. Source: https://en.wikipedia.org/wiki/Lyndon_word
3- fn factorization_with_duval ( s : Vec < char > ) -> Vec < String > {
4- let n = s. len ( ) ;
5- let mut i = 0 ;
6- let mut factorization: Vec < String > = Vec :: new ( ) ;
1+ //! Implementation of Duval's Algorithm to compute the standard factorization of a string
2+ //! into Lyndon words. A Lyndon word is defined as a string that is strictly smaller
3+ //! (lexicographically) than any of its nontrivial suffixes. This implementation operates
4+ //! in linear time and space.
75
8- while i < n {
9- let mut j = i + 1 ;
10- let mut k = i;
6+ /// Performs Duval's algorithm to factorize a given string into its Lyndon words.
7+ ///
8+ /// # Arguments
9+ ///
10+ /// * `s` - A slice of characters representing the input string.
11+ ///
12+ /// # Returns
13+ ///
14+ /// A vector of strings, where each string is a Lyndon word, representing the factorization
15+ /// of the input string.
16+ ///
17+ /// # Time Complexity
18+ ///
19+ /// The algorithm runs in O(n) time, where `n` is the length of the input string.
20+ pub fn duval_algorithm ( s : & str ) -> Vec < String > {
21+ factorize_duval ( & s. chars ( ) . collect :: < Vec < char > > ( ) )
22+ }
23+
24+ /// Helper function that takes a string slice, converts it to a vector of characters,
25+ /// and then applies the Duval factorization algorithm to find the Lyndon words.
26+ ///
27+ /// # Arguments
28+ ///
29+ /// * `s` - A string slice representing the input text.
30+ ///
31+ /// # Returns
32+ ///
33+ /// A vector of strings, each representing a Lyndon word in the factorization.
34+ fn factorize_duval ( s : & [ char ] ) -> Vec < String > {
35+ let mut start = 0 ;
36+ let mut factors: Vec < String > = Vec :: new ( ) ;
1137
12- while j < n && s[ k] <= s[ j] {
13- if s[ k] < s[ j] {
14- k = i;
38+ while start < s. len ( ) {
39+ let mut end = start + 1 ;
40+ let mut repeat = start;
41+
42+ while end < s. len ( ) && s[ repeat] <= s[ end] {
43+ if s[ repeat] < s[ end] {
44+ repeat = start;
1545 } else {
16- k += 1 ;
46+ repeat += 1 ;
1747 }
18- j += 1 ;
48+ end += 1 ;
1949 }
2050
21- while i <= k {
22- factorization . push ( s[ i..i + j - k ] . iter ( ) . collect :: < String > ( ) ) ;
23- i += j - k ;
51+ while start <= repeat {
52+ factors . push ( s[ start..start + end - repeat ] . iter ( ) . collect :: < String > ( ) ) ;
53+ start += end - repeat ;
2454 }
2555 }
2656
27- factorization
28- }
29-
30- pub fn duval_algorithm ( s : & str ) -> Vec < String > {
31- return factorization_with_duval ( s. chars ( ) . collect :: < Vec < char > > ( ) ) ;
57+ factors
3258}
3359
3460#[ cfg( test) ]
@@ -37,29 +63,35 @@ mod test {
3763
3864 macro_rules! test_duval_algorithm {
3965 ( $( $name: ident: $inputs: expr, ) * ) => {
40- $(
41- #[ test]
42- fn $name( ) {
43- let ( text, expected) = $inputs;
44- assert_eq!( duval_algorithm( text) , expected) ;
45- }
46- ) *
66+ $(
67+ #[ test]
68+ fn $name( ) {
69+ let ( text, expected) = $inputs;
70+ assert_eq!( duval_algorithm( text) , expected) ;
71+ }
72+ ) *
4773 }
4874 }
4975
5076 test_duval_algorithm ! {
51- multiple : ( "abcdabcdababc" , [ "abcd" , "abcd" , "ababc" ] ) ,
52- all : ( "aaa" , [ "a" , "a" , "a" ] ) ,
77+ repeating_with_suffix : ( "abcdabcdababc" , [ "abcd" , "abcd" , "ababc" ] ) ,
78+ single_repeating_char : ( "aaa" , [ "a" , "a" , "a" ] ) ,
5379 single: ( "ababb" , [ "ababb" ] ) ,
5480 unicode: ( "അഅഅ" , [ "അ" , "അ" , "അ" ] ) ,
55- }
56-
57- #[ test]
58- fn test_factorization_with_duval_multiple ( ) {
59- let text = "abcdabcdababc" ;
60- assert_eq ! (
61- factorization_with_duval( text. chars( ) . collect:: <Vec <char >>( ) ) ,
62- [ "abcd" , "abcd" , "ababc" ]
63- ) ;
81+ empty_string: ( "" , Vec :: <String >:: new( ) ) ,
82+ single_char: ( "x" , [ "x" ] ) ,
83+ palindrome: ( "racecar" , [ "r" , "acecar" ] ) ,
84+ long_repeating: ( "aaaaaa" , [ "a" , "a" , "a" , "a" , "a" , "a" ] ) ,
85+ mixed_repeating: ( "ababcbabc" , [ "ababcbabc" ] ) ,
86+ non_repeating_sorted: ( "abcdefg" , [ "abcdefg" ] ) ,
87+ alternating_increasing: ( "abababab" , [ "ab" , "ab" , "ab" , "ab" ] ) ,
88+ long_repeating_lyndon: ( "abcabcabcabc" , [ "abc" , "abc" , "abc" , "abc" ] ) ,
89+ decreasing_order: ( "zyxwvutsrqponm" , [ "z" , "y" , "x" , "w" , "v" , "u" , "t" , "s" , "r" , "q" , "p" , "o" , "n" , "m" ] ) ,
90+ alphanumeric_mixed: ( "a1b2c3a1" , [ "a" , "1b2c3a" , "1" ] ) ,
91+ special_characters: ( "a@b#c$d" , [ "a" , "@b" , "#c$d" ] ) ,
92+ unicode_complex: ( "αβγδ" , [ "αβγδ" ] ) ,
93+ long_string_performance: ( & "a" . repeat( 1_000_000 ) , vec![ "a" ; 1_000_000 ] ) ,
94+ palindrome_repeating_prefix: ( "abccba" , [ "abccb" , "a" ] ) ,
95+ interrupted_lyndon: ( "abcxabc" , [ "abcx" , "abc" ] ) ,
6496 }
6597}
0 commit comments