xdl-dataframe 0.1.1

DataFrame module for XDL - pandas/Spark-style data manipulation with support for CSV, TSV, Parquet, Avro
Documentation
; XDL Charting with DataFrame
; Demonstrates comprehensive charting capabilities with DataFrame data

PRO charting_dataframe_demo
    PRINT, '=== XDL DataFrame + Charting Demo ==='
    PRINT, ''

    ; Generate time series data
    PRINT, '1. Generating Time Series Data'
    PRINT, '------------------------------'

    n_days = 365
    dates = INDGEN(n_days)

    ; Generate sales data with trend and seasonality
    trend = 1000 + dates * 5
    seasonal = 500 * SIN(2 * !PI * dates / 365)
    noise = RANDOMN(seed, n_days) * 100

    sales = trend + seasonal + noise
    sales = sales > 0  ; Ensure positive

    ; Generate categories
    products = ['Product A', 'Product B', 'Product C']
    n_products = N_ELEMENTS(products)

    PRINT, 'Generated ', n_days, ' days of sales data'
    PRINT, ''

    ; Create DataFrame
    PRINT, '2. Creating Sales DataFrame'
    PRINT, '--------------------------'

    lun = GET_LUN()
    OPENW, lun, 'sales_data.csv'
    PRINTF, lun, 'day,sales,product,quarter,month'

    FOR i = 0, n_days-1 DO BEGIN
        product = products[i MOD n_products]
        quarter = (i / 90) + 1
        month = (i / 30) + 1

        PRINTF, lun, FORMAT='(I3,",",F8.2,",",A,",",I1,",",I2)', $
                i, sales[i], product, quarter, month
    ENDFOR

    CLOSE, lun
    FREE_LUN, lun

    df = XDLDATAFRAME_READ_CSV('sales_data.csv')
    PRINT, 'DataFrame created: ', df->Shape()
    PRINT, ''

    ; Chart 1: Line Chart - Sales Trend
    PRINT, '3. Creating Line Chart - Sales Trend'
    PRINT, '-----------------------------------'

    WINDOW, 0, XSIZE=1000, YSIZE=600

    PLOT, dates, sales, $
          XTITLE='Day of Year', $
          YTITLE='Sales ($)', $
          TITLE='Daily Sales Trend', $
          THICK=2, $
          XRANGE=[0, n_days], $
          YRANGE=[0, MAX(sales)*1.1]

    ; Add moving average
    window_size = 30
    moving_avg = SMOOTH(sales, window_size)
    OPLOT, dates, moving_avg, COLOR=!RED, THICK=3, LINESTYLE=2

    XYOUTS, 10, MAX(sales)*1.05, 'Raw Data', /DATA
    XYOUTS, 10, MAX(sales)*1.00, 'Moving Average (30-day)', COLOR=!RED, /DATA

    PRINT, 'Line chart created with moving average'
    PRINT, ''

    ; Chart 2: Bar Chart - Quarterly Sales
    PRINT, '4. Creating Bar Chart - Quarterly Summary'
    PRINT, '----------------------------------------'

    WINDOW, 1, XSIZE=800, YSIZE=600

    ; Calculate quarterly totals using DataFrame
    quarterly_df = df->GroupBy(['quarter'])->Sum()
    quarters = [1, 2, 3, 4]

    ; Extract quarterly sales
    q_sales = FLTARR(4)
    FOR i = 0, 3 DO BEGIN
        row = quarterly_df->Row(i)
        q_sales[i] = row['sales']
    ENDFOR

    PLOT, quarters, q_sales, PSYM=0, $
          XTITLE='Quarter', YTITLE='Total Sales ($)', $
          TITLE='Quarterly Sales Summary', $
          XRANGE=[0, 5], YRANGE=[0, MAX(q_sales)*1.2], $
          /NODATA

    ; Draw bars
    colors = [!RED, !BLUE, !GREEN, !YELLOW]
    FOR i = 0, 3 DO BEGIN
        POLYFILL, [quarters[i]-0.3, quarters[i]+0.3, $
                   quarters[i]+0.3, quarters[i]-0.3], $
                  [0, 0, q_sales[i], q_sales[i]], $
                  COLOR=colors[i]

        ; Add value labels
        XYOUTS, quarters[i], q_sales[i] + MAX(q_sales)*0.03, $
                STRING(q_sales[i], FORMAT='(I6)'), $
                ALIGNMENT=0.5, /DATA
    ENDFOR

    PRINT, 'Quarterly bar chart created'
    PRINT, ''

    ; Chart 3: Stacked Area Chart
    PRINT, '5. Creating Stacked Area Chart'
    PRINT, '-----------------------------'

    WINDOW, 2, XSIZE=1000, YSIZE=600

    ; Generate data for three products
    prod_a = 500 + 200 * SIN(2*!PI*dates/365) + RANDOMN(seed, n_days) * 50
    prod_b = 400 + 150 * COS(2*!PI*dates/365) + RANDOMN(seed, n_days) * 40
    prod_c = 300 + 100 * SIN(4*!PI*dates/365) + RANDOMN(seed, n_days) * 30

    prod_a = prod_a > 0
    prod_b = prod_b > 0
    prod_c = prod_c > 0

    ; Create cumulative data for stacking
    stack1 = prod_a
    stack2 = prod_a + prod_b
    stack3 = prod_a + prod_b + prod_c

    PLOT, dates, stack3, $
          XTITLE='Day', YTITLE='Sales ($)', $
          TITLE='Stacked Area Chart - Product Sales', $
          THICK=2, $
          YRANGE=[0, MAX(stack3)*1.1], $
          /NODATA

    ; Fill areas
    POLYFILL, [dates, REVERSE(dates)], $
              [stack3, REPLICATE(0, n_days)], $
              COLOR=!GREEN

    POLYFILL, [dates, REVERSE(dates)], $
              [stack2, REPLICATE(0, n_days)], $
              COLOR=!BLUE

    POLYFILL, [dates, REVERSE(dates)], $
              [stack1, REPLICATE(0, n_days)], $
              COLOR=!RED

    ; Add legend
    XYOUTS, 10, MAX(stack3)*1.05, 'Product C', COLOR=!GREEN, /DATA
    XYOUTS, 10, MAX(stack3)*1.00, 'Product B', COLOR=!BLUE, /DATA
    XYOUTS, 10, MAX(stack3)*0.95, 'Product A', COLOR=!RED, /DATA

    PRINT, 'Stacked area chart created'
    PRINT, ''

    ; Chart 4: Scatter Plot with Correlation
    PRINT, '6. Creating Scatter Plot Matrix'
    PRINT, '------------------------------'

    WINDOW, 3, XSIZE=800, YSIZE=800

    ; Generate correlated variables
    marketing_spend = 100 + dates * 0.5 + RANDOMN(seed, n_days) * 20
    customer_count = 50 + dates * 0.3 + RANDOMN(seed, n_days) * 10

    ; Create 2x2 scatter plot matrix
    !P.MULTI = [0, 2, 2]

    ; Sales vs Marketing
    PLOT, marketing_spend, sales, PSYM=3, $
          XTITLE='Marketing Spend', YTITLE='Sales', $
          TITLE='Sales vs Marketing'

    corr_sm = CORRELATE(marketing_spend, sales)
    XYOUTS, 0.1, 0.9, 'r=' + STRING(corr_sm, FORMAT='(F5.3)'), $
            /NORMAL, CHARSIZE=1.5

    ; Sales vs Customers
    PLOT, customer_count, sales, PSYM=3, $
          XTITLE='Customer Count', YTITLE='Sales', $
          TITLE='Sales vs Customers'

    corr_sc = CORRELATE(customer_count, sales)
    XYOUTS, 0.6, 0.9, 'r=' + STRING(corr_sc, FORMAT='(F5.3)'), $
            /NORMAL, CHARSIZE=1.5

    ; Marketing vs Customers
    PLOT, marketing_spend, customer_count, PSYM=3, $
          XTITLE='Marketing Spend', YTITLE='Customer Count', $
          TITLE='Marketing vs Customers'

    corr_mc = CORRELATE(marketing_spend, customer_count)
    XYOUTS, 0.1, 0.4, 'r=' + STRING(corr_mc, FORMAT='(F5.3)'), $
            /NORMAL, CHARSIZE=1.5

    ; Time series of all three
    PLOT, dates, BYTSCL(sales), $
          XTITLE='Day', YTITLE='Normalized Value', $
          TITLE='Time Series Comparison', $
          THICK=2

    OPLOT, dates, BYTSCL(marketing_spend), COLOR=!RED, THICK=2
    OPLOT, dates, BYTSCL(customer_count), COLOR=!BLUE, THICK=2

    !P.MULTI = 0

    PRINT, 'Scatter plot matrix created'
    PRINT, ''

    ; Chart 5: Histogram and Box Plot
    PRINT, '7. Creating Distribution Plots'
    PRINT, '-----------------------------'

    WINDOW, 4, XSIZE=1000, YSIZE=500

    !P.MULTI = [0, 2, 1]

    ; Histogram
    hist = HISTOGRAM(sales, NBINS=30, LOCATIONS=bins)
    PLOT, bins, hist, PSYM=10, THICK=2, $
          XTITLE='Sales ($)', YTITLE='Frequency', $
          TITLE='Sales Distribution Histogram'

    ; Add statistics
    mean_sales = MEAN(sales)
    median_sales = MEDIAN(sales)
    OPLOT, [mean_sales, mean_sales], [0, MAX(hist)], $
           COLOR=!RED, LINESTYLE=2, THICK=2
    OPLOT, [median_sales, median_sales], [0, MAX(hist)], $
           COLOR=!BLUE, LINESTYLE=2, THICK=2

    XYOUTS, 0.1, 0.9, 'Mean', COLOR=!RED, /NORMAL
    XYOUTS, 0.1, 0.85, 'Median', COLOR=!BLUE, /NORMAL

    ; Box plot simulation
    quartiles = FLTARR(5)
    sorted_sales = sales[SORT(sales)]
    n = N_ELEMENTS(sales)

    quartiles[0] = MIN(sales)
    quartiles[1] = sorted_sales[n/4]
    quartiles[2] = MEDIAN(sales)
    quartiles[3] = sorted_sales[3*n/4]
    quartiles[4] = MAX(sales)

    PLOT, [1], [mean_sales], PSYM=0, $
          XTITLE='', YTITLE='Sales ($)', $
          TITLE='Box Plot', $
          XRANGE=[0, 2], YRANGE=[MIN(sales)*0.9, MAX(sales)*1.1], $
          /NODATA, XTICKS=1, XTICKNAME=[' ', ' ']

    ; Draw box
    POLYFILL, [0.7, 1.3, 1.3, 0.7], $
              [quartiles[1], quartiles[1], quartiles[3], quartiles[3]], $
              COLOR=!CYAN

    ; Draw median line
    OPLOT, [0.7, 1.3], [quartiles[2], quartiles[2]], THICK=3

    ; Draw whiskers
    OPLOT, [1, 1], [quartiles[0], quartiles[1]], THICK=2
    OPLOT, [1, 1], [quartiles[3], quartiles[4]], THICK=2

    !P.MULTI = 0

    PRINT, 'Distribution plots created'
    PRINT, ''

    ; Chart 6: Heatmap
    PRINT, '8. Creating Correlation Heatmap'
    PRINT, '------------------------------'

    WINDOW, 5, XSIZE=600, YSIZE=600

    ; Create correlation matrix
    variables = [[sales], [marketing_spend], [customer_count]]
    n_vars = 3
    corr_matrix = FLTARR(n_vars, n_vars)

    FOR i = 0, n_vars-1 DO BEGIN
        FOR j = 0, n_vars-1 DO BEGIN
            corr_matrix[i,j] = CORRELATE(variables[*,i], variables[*,j])
        ENDFOR
    ENDFOR

    ; Display as heatmap
    TVSCL, corr_matrix
    CONTOUR, corr_matrix, LEVELS=FINDGEN(11)/10, $
            /OVERPLOT, /FILL, $
            XTITLE='Variable', YTITLE='Variable', $
            TITLE='Correlation Heatmap'

    ; Add labels
    labels = ['Sales', 'Marketing', 'Customers']
    FOR i = 0, n_vars-1 DO BEGIN
        FOR j = 0, n_vars-1 DO BEGIN
            XYOUTS, i, j, STRING(corr_matrix[i,j], FORMAT='(F4.2)'), $
                   ALIGNMENT=0.5, /DATA, CHARSIZE=1.5
        ENDFOR
    ENDFOR

    PRINT, 'Correlation heatmap created'
    PRINT, ''

    ; DataFrame Statistics
    PRINT, '9. DataFrame Statistics'
    PRINT, '----------------------'

    sales_stats = df->Column('sales')->Describe()

    PRINT, 'Sales Statistics:'
    PRINT, '  Count: ', sales_stats.count
    PRINT, '  Mean: $', sales_stats.mean
    PRINT, '  Median: $', sales_stats.median
    PRINT, '  Std Dev: $', sales_stats.std
    PRINT, '  Min: $', sales_stats.min
    PRINT, '  Max: $', sales_stats.max
    PRINT, ''

    ; Export chart data
    PRINT, '10. Exporting Chart Data'
    PRINT, '-----------------------'

    lun = GET_LUN()
    OPENW, lun, 'chart_summary.csv'
    PRINTF, lun, 'metric,value'
    PRINTF, lun, 'mean_sales,' + STRING(sales_stats.mean)
    PRINTF, lun, 'median_sales,' + STRING(sales_stats.median)
    PRINTF, lun, 'std_sales,' + STRING(sales_stats.std)
    PRINTF, lun, 'corr_sales_marketing,' + STRING(corr_sm)
    PRINTF, lun, 'corr_sales_customers,' + STRING(corr_sc)
    PRINTF, lun, 'corr_marketing_customers,' + STRING(corr_mc)
    CLOSE, lun
    FREE_LUN, lun

    PRINT, 'Chart data exported to chart_summary.csv'
    PRINT, ''

    PRINT, '=== Charting Demo Complete ==='
    PRINT, ''
    PRINT, 'Created 6 visualization windows:'
    PRINT, '  Window 0: Line chart with moving average'
    PRINT, '  Window 1: Quarterly sales bar chart'
    PRINT, '  Window 2: Stacked area chart'
    PRINT, '  Window 3: Scatter plot matrix (2x2)'
    PRINT, '  Window 4: Histogram and box plot'
    PRINT, '  Window 5: Correlation heatmap'
    PRINT, ''
    PRINT, 'Exported Files:'
    PRINT, '  - sales_data.csv (raw data)'
    PRINT, '  - chart_summary.csv (statistics)'
ENDPRO

; Run the demo
charting_dataframe_demo