dissolve-python 0.3.0

A tool to dissolve deprecated calls in Python codebases
Documentation
// Copyright (C) 2024 Jelmer Vernooij <jelmer@samba.org>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Optimizations and refactoring helpers to reduce allocations

use std::borrow::Cow;
use std::rc::Rc;
use std::sync::Arc;

/// String storage that can be either owned or borrowed
/// This reduces clones when strings don't need to be modified
pub type CowStr<'a> = Cow<'a, str>;

/// Shared string that avoids cloning for read-only access
pub type SharedString = Arc<str>;

/// Convert common string operations to use Cow to avoid clones
pub trait StringOptimizations {
    /// Get a borrowed or owned string depending on whether modification is needed
    fn as_cow(&self) -> CowStr;

    /// Share a string across multiple owners without cloning
    fn to_shared(&self) -> SharedString;
}

impl StringOptimizations for String {
    fn as_cow(&self) -> CowStr {
        Cow::Borrowed(self.as_str())
    }

    fn to_shared(&self) -> SharedString {
        Arc::from(self.as_str())
    }
}

impl StringOptimizations for &str {
    fn as_cow(&self) -> CowStr {
        Cow::Borrowed(self)
    }

    fn to_shared(&self) -> SharedString {
        Arc::from(*self)
    }
}

/// Cache for commonly used strings to avoid repeated allocations
pub struct StringCache {
    cache: std::collections::HashMap<String, Rc<str>>,
}

impl StringCache {
    pub fn new() -> Self {
        Self {
            cache: std::collections::HashMap::new(),
        }
    }

    /// Get or insert a string in the cache
    pub fn get_or_insert(&mut self, s: &str) -> Rc<str> {
        if let Some(cached) = self.cache.get(s) {
            Rc::clone(cached)
        } else {
            let rc = Rc::from(s);
            self.cache.insert(s.to_string(), Rc::clone(&rc));
            rc
        }
    }

    /// Get a string from the cache if it exists
    pub fn get(&self, s: &str) -> Option<Rc<str>> {
        self.cache.get(s).map(Rc::clone)
    }
}

impl Default for StringCache {
    fn default() -> Self {
        Self::new()
    }
}

/// Optimized parameter mapping that reduces clones
pub struct OptimizedParamMap<'a> {
    data: std::collections::HashMap<&'a str, &'a str>,
    owned: std::collections::HashMap<String, String>,
}

impl<'a> OptimizedParamMap<'a> {
    pub fn new() -> Self {
        Self {
            data: std::collections::HashMap::new(),
            owned: std::collections::HashMap::new(),
        }
    }

    /// Insert a borrowed key-value pair
    pub fn insert_borrowed(&mut self, key: &'a str, value: &'a str) {
        self.data.insert(key, value);
    }

    /// Insert an owned key-value pair (only when necessary)
    pub fn insert_owned(&mut self, key: String, value: String) {
        self.owned.insert(key, value);
    }

    /// Get a value by key (checks both borrowed and owned maps)
    pub fn get(&self, key: &str) -> Option<&str> {
        self.data
            .get(key)
            .map(|&s| s)
            .or_else(|| self.owned.get(key).map(String::as_str))
    }

    /// Check if a key exists
    pub fn contains_key(&self, key: &str) -> bool {
        self.data.contains_key(key) || self.owned.contains_key(key)
    }

    /// Iterate over all key-value pairs
    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
        self.data
            .iter()
            .map(|(&k, &v)| (k, v))
            .chain(self.owned.iter().map(|(k, v)| (k.as_str(), v.as_str())))
    }
}

impl<'a> Default for OptimizedParamMap<'a> {
    fn default() -> Self {
        Self::new()
    }
}

/// Helper to avoid cloning in common patterns
pub mod clone_reduction {
    use super::*;

    /// Process a string without cloning unless modification is needed
    pub fn process_string<'a>(s: &'a str, needs_modification: bool) -> Cow<'a, str> {
        if needs_modification {
            // Only clone when we actually need to modify
            let owned = s.to_string();
            // Apply modifications here
            Cow::Owned(owned)
        } else {
            Cow::Borrowed(s)
        }
    }

    /// Share a collection element without cloning
    pub fn share_element<T: Clone>(collection: &[T], index: usize) -> Option<Rc<T>> {
        collection.get(index).map(|elem| Rc::new(elem.clone()))
    }

    /// Use string interning for frequently used strings
    pub fn intern_string(s: &str, cache: &mut StringCache) -> Rc<str> {
        cache.get_or_insert(s)
    }
}

/// Optimized replacement for HashMap<String, String> that reduces allocations
pub struct OptimizedStringMap {
    // Use Arc<str> for keys and values to enable cheap cloning
    data: std::collections::HashMap<Arc<str>, Arc<str>>,
}

impl OptimizedStringMap {
    pub fn new() -> Self {
        Self {
            data: std::collections::HashMap::new(),
        }
    }

    pub fn insert(&mut self, key: impl Into<Arc<str>>, value: impl Into<Arc<str>>) {
        self.data.insert(key.into(), value.into());
    }

    pub fn get(&self, key: &str) -> Option<&str> {
        self.data.get(key).map(|arc| arc.as_ref())
    }

    pub fn contains_key(&self, key: &str) -> bool {
        self.data.contains_key(key)
    }

    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
        self.data.iter().map(|(k, v)| (k.as_ref(), v.as_ref()))
    }

    /// Clone is cheap because Arc<str> is reference counted
    pub fn cheap_clone(&self) -> Self {
        Self {
            data: self.data.clone(),
        }
    }
}

impl Default for OptimizedStringMap {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_string_cache() {
        let mut cache = StringCache::new();

        let s1 = cache.get_or_insert("hello");
        let s2 = cache.get_or_insert("hello");

        // Should return the same Rc
        assert!(Rc::ptr_eq(&s1, &s2));
    }

    #[test]
    fn test_optimized_param_map() {
        let mut map = OptimizedParamMap::new();

        map.insert_borrowed("key1", "value1");
        map.insert_owned("key2".to_string(), "value2".to_string());

        assert_eq!(map.get("key1"), Some("value1"));
        assert_eq!(map.get("key2"), Some("value2"));
        assert!(map.contains_key("key1"));
        assert!(map.contains_key("key2"));
    }

    #[test]
    fn test_cow_usage() {
        let original = "hello";

        // No modification needed - no allocation
        let borrowed = clone_reduction::process_string(original, false);
        assert!(matches!(borrowed, Cow::Borrowed(_)));

        // Modification needed - allocation happens
        let owned = clone_reduction::process_string(original, true);
        assert!(matches!(owned, Cow::Owned(_)));
    }

    #[test]
    fn test_optimized_string_map() {
        let mut map = OptimizedStringMap::new();

        map.insert("key1", "value1");
        map.insert("key2", "value2");

        assert_eq!(map.get("key1"), Some("value1"));
        assert_eq!(map.get("key2"), Some("value2"));

        // Cloning is cheap with Arc
        let cloned = map.cheap_clone();
        assert_eq!(cloned.get("key1"), Some("value1"));
    }
}